• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mindersec / minder / 11380841120

17 Oct 2024 08:03AM UTC coverage: 54.717% (-0.08%) from 54.792%
11380841120

push

github

web-flow
Implement Minder TestKit (#4762)

* Implement Minder TestKit

TestKit is an implementation of several interfaces that allows for
easier testing. The intent is to be able to test rule types locally via
a programmatic framework, as opposed to relying on an integration test.

Signed-off-by: Juan Antonio Osorio <ozz@stacklok.com>

* Add REST ingester support

Signed-off-by: Juan Antonio Osorio <ozz@stacklok.com>

* Address feedback

Signed-off-by: Juan Antonio Osorio <ozz@stacklok.com>

* Change signature of rule parameters validation

It now takes a map, since that's easier to use.

Signed-off-by: Juan Antonio Osorio <ozz@stacklok.com>

---------

Signed-off-by: Juan Antonio Osorio <ozz@stacklok.com>

53 of 63 new or added lines in 21 files covered. (84.13%)

2 existing lines in 1 file now uncovered.

14941 of 27306 relevant lines covered (54.72%)

41.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

47.65
/internal/engine/ingester/diff/diff.go
1
// SPDX-FileCopyrightText: Copyright 2023 The Minder Authors
2
// SPDX-License-Identifier: Apache-2.0
3

4
// Package diff provides the diff rule data ingest engine
5
package diff
6

7
import (
8
        "bufio"
9
        "cmp"
10
        "context"
11
        "fmt"
12
        "math"
13
        "path/filepath"
14
        "regexp"
15
        "slices"
16
        "strconv"
17
        "strings"
18

19
        "github.com/go-git/go-billy/v5"
20
        "github.com/go-git/go-billy/v5/helper/iofs"
21
        scalibr "github.com/google/osv-scalibr"
22
        "github.com/google/osv-scalibr/extractor"
23
        "github.com/google/osv-scalibr/extractor/filesystem/list"
24
        scalibr_fs "github.com/google/osv-scalibr/fs"
25
        scalibr_plugin "github.com/google/osv-scalibr/plugin"
26
        "github.com/google/osv-scalibr/purl"
27
        "github.com/rs/zerolog"
28
        "google.golang.org/protobuf/reflect/protoreflect"
29

30
        pbinternal "github.com/mindersec/minder/internal/proto"
31
        pb "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
32
        "github.com/mindersec/minder/pkg/engine/v1/interfaces"
33
        "github.com/mindersec/minder/pkg/entities/v1/checkpoints"
34
        provifv1 "github.com/mindersec/minder/pkg/providers/v1"
35
)
36

37
const (
38
        // DiffRuleDataIngestType is the type of the diff rule data ingest engine
39
        DiffRuleDataIngestType = "diff"
40
        prFilesPerPage         = 30
41
        wildcard               = "*"
42
)
43

44
// Diff is the diff rule data ingest engine
45
type Diff struct {
46
        cli provifv1.GitHub
47
        cfg *pb.DiffType
48
}
49

50
// NewDiffIngester creates a new diff ingester
51
func NewDiffIngester(
52
        cfg *pb.DiffType,
53
        cli provifv1.GitHub,
54
) (*Diff, error) {
4✔
55
        if cfg == nil {
4✔
56
                cfg = &pb.DiffType{}
×
57
        }
×
58

59
        if cli == nil {
4✔
60
                return nil, fmt.Errorf("provider is nil")
×
61
        }
×
62

63
        return &Diff{
4✔
64
                cfg: cfg,
4✔
65
                cli: cli,
4✔
66
        }, nil
4✔
67
}
68

69
// GetType returns the type of the diff rule data ingest engine
70
func (*Diff) GetType() string {
3✔
71
        return DiffRuleDataIngestType
3✔
72
}
3✔
73

74
// GetConfig returns the config for the diff rule data ingest engine
75
func (di *Diff) GetConfig() protoreflect.ProtoMessage {
6✔
76
        return di.cfg
6✔
77
}
6✔
78

79
// Ingest ingests a diff from a pull request in accordance with its type
80
//
81
//nolint:gocyclo
82
func (di *Diff) Ingest(
83
        ctx context.Context,
84
        ent protoreflect.ProtoMessage,
85
        _ map[string]any,
86
) (*interfaces.Result, error) {
4✔
87
        pr, ok := ent.(*pb.PullRequest)
4✔
88
        if !ok {
4✔
89
                return nil, fmt.Errorf("entity is not a pull request")
×
90
        }
×
91

92
        // The GitHub Go API takes an int32, but our proto stores an int64; make sure we don't overflow
93
        if pr.Number > math.MaxInt {
4✔
94
                return nil, fmt.Errorf("pr number is too large")
×
95
        }
×
96
        prNumber := int(pr.Number)
4✔
97

4✔
98
        switch di.cfg.GetType() {
4✔
99
        case "", pb.DiffTypeDep:
×
100
                return di.getDepTypeDiff(ctx, prNumber, pr)
×
101

102
        case pb.DiffTypeNewDeps:
4✔
103
                // TODO: once we've tested some, convert DiffTypeDep to use this algorithm.
4✔
104
                return di.getScalibrTypeDiff(ctx, prNumber, pr)
4✔
105

106
        case pb.DiffTypeFull:
×
107
                return di.getFullTypeDiff(ctx, prNumber, pr)
×
108

109
        default:
×
110
                return nil, fmt.Errorf("unknown diff type")
×
111
        }
112
}
113

NEW
114
func (di *Diff) getDepTypeDiff(ctx context.Context, prNumber int, pr *pb.PullRequest) (*interfaces.Result, error) {
×
115
        deps := pbinternal.PrDependencies{Pr: pr}
×
116
        page := 0
×
117

×
118
        for {
×
119
                prFiles, resp, err := di.cli.ListFiles(ctx, pr.RepoOwner, pr.RepoName, prNumber, prFilesPerPage, page)
×
120
                if err != nil {
×
121
                        return nil, fmt.Errorf("error getting pull request files: %w", err)
×
122
                }
×
123

124
                for _, file := range prFiles {
×
125
                        fileDiffs, err := di.ingestFileForDepDiff(file.GetFilename(), file.GetPatch(), file.GetRawURL(), *zerolog.Ctx(ctx))
×
126
                        if err != nil {
×
127
                                return nil, fmt.Errorf("error ingesting file %s: %w", file.GetFilename(), err)
×
128
                        }
×
129
                        deps.Deps = append(deps.Deps, fileDiffs...)
×
130
                }
131

132
                if resp.NextPage == 0 {
×
133
                        break
×
134
                }
135

136
                page = resp.NextPage
×
137
        }
138

NEW
139
        return &interfaces.Result{Object: &deps, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
×
140
}
141

NEW
142
func (di *Diff) getFullTypeDiff(ctx context.Context, prNumber int, pr *pb.PullRequest) (*interfaces.Result, error) {
×
143
        diff := &pbinternal.PrContents{Pr: pr}
×
144
        page := 0
×
145

×
146
        for {
×
147
                prFiles, resp, err := di.cli.ListFiles(ctx, pr.RepoOwner, pr.RepoName, prNumber, prFilesPerPage, page)
×
148
                if err != nil {
×
149
                        return nil, fmt.Errorf("error getting pull request files: %w", err)
×
150
                }
×
151

152
                for _, file := range prFiles {
×
153
                        fileDiffs, err := ingestFileForFullDiff(file.GetFilename(), file.GetPatch(), file.GetRawURL())
×
154
                        if err != nil {
×
155
                                return nil, fmt.Errorf("error ingesting file %s: %w", file.GetFilename(), err)
×
156
                        }
×
157
                        diff.Files = append(diff.Files, fileDiffs)
×
158
                }
159

160
                if resp.NextPage == 0 {
×
161
                        break
×
162
                }
163

164
                page = resp.NextPage
×
165
        }
166

NEW
167
        return &interfaces.Result{Object: diff, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
×
168
}
169

170
func (di *Diff) ingestFileForDepDiff(
171
        filename, patchContents, patchUrl string,
172
        logger zerolog.Logger,
173
) ([]*pbinternal.PrDependencies_ContextualDependency, error) {
×
174
        parser := di.getParserForFile(filename, logger)
×
175
        if parser == nil {
×
176
                return nil, nil
×
177
        }
×
178

179
        depBatch, err := parser(patchContents)
×
180
        if err != nil {
×
181
                return nil, fmt.Errorf("error parsing file %s: %w", filename, err)
×
182
        }
×
183

184
        batchCtxDeps := make([]*pbinternal.PrDependencies_ContextualDependency, 0, len(depBatch))
×
185
        for i := range depBatch {
×
186
                dep := depBatch[i]
×
187
                batchCtxDeps = append(batchCtxDeps, &pbinternal.PrDependencies_ContextualDependency{
×
188
                        Dep: dep,
×
189
                        File: &pbinternal.PrDependencies_ContextualDependency_FilePatch{
×
190
                                Name:     filename,
×
191
                                PatchUrl: patchUrl,
×
192
                        },
×
193
                })
×
194
        }
×
195

196
        return batchCtxDeps, nil
×
197
}
198

199
func (di *Diff) getScalibrTypeDiff(ctx context.Context, _ int, pr *pb.PullRequest) (*interfaces.Result, error) {
4✔
200
        deps := pbinternal.PrDependencies{Pr: pr}
4✔
201

4✔
202
        // TODO: we should be able to just fetch the additional commits between base and target.
4✔
203
        // Our current Git abstraction isn't quite powerful enough, so we do two shallow clones.
4✔
204

4✔
205
        baseInventory, err := di.scalibrInventory(ctx, pr.BaseCloneUrl, pr.BaseRef)
4✔
206
        if err != nil {
4✔
207
                return nil, fmt.Errorf("Failed to clone base from %s at %q: %w", pr.BaseCloneUrl, pr.BaseRef, err)
×
208
        }
×
209
        newInventory, err := di.scalibrInventory(ctx, pr.TargetCloneUrl, pr.TargetRef)
4✔
210
        if err != nil {
4✔
211
                return nil, fmt.Errorf("Failed to clone fork from %s at %q: %w", pr.TargetCloneUrl, pr.TargetRef, err)
×
212
        }
×
213

214
        newDeps := setDifference(baseInventory, newInventory, inventorySorter)
4✔
215

4✔
216
        deps.Deps = make([]*pbinternal.PrDependencies_ContextualDependency, 0, len(newDeps))
4✔
217
        for _, inventory := range newDeps {
10✔
218
                for _, filename := range inventory.Locations {
12✔
219
                        deps.Deps = append(deps.Deps, &pbinternal.PrDependencies_ContextualDependency{
6✔
220
                                Dep: &pbinternal.Dependency{
6✔
221
                                        Ecosystem: inventoryToEcosystem(inventory),
6✔
222
                                        Name:      inventory.Name,
6✔
223
                                        Version:   inventory.Version,
6✔
224
                                },
6✔
225
                                File: &pbinternal.PrDependencies_ContextualDependency_FilePatch{
6✔
226
                                        Name:     filename,
6✔
227
                                        PatchUrl: "", // TODO: do we need this?
6✔
228
                                },
6✔
229
                        })
6✔
230
                }
6✔
231
        }
232

233
        return &interfaces.Result{Object: &deps, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
4✔
234
}
235

236
func inventorySorter(a *extractor.Inventory, b *extractor.Inventory) int {
23✔
237
        // If we compare by name and version first, we can avoid serializing Locations to strings
23✔
238
        res := cmp.Or(cmp.Compare(a.Name, b.Name), cmp.Compare(a.Version, b.Version))
23✔
239
        if res != 0 {
44✔
240
                return res
21✔
241
        }
21✔
242
        // TODO: Locations should probably be sorted, but scalibr is going to export a compare function.
243
        aLoc := fmt.Sprintf("%v", a.Locations)
2✔
244
        bLoc := fmt.Sprintf("%v", b.Locations)
2✔
245
        return cmp.Compare(aLoc, bLoc)
2✔
246
}
247

248
func (di *Diff) scalibrInventory(ctx context.Context, repoURL string, ref string) ([]*extractor.Inventory, error) {
8✔
249
        clone, err := di.cli.Clone(ctx, repoURL, ref)
8✔
250
        if err != nil {
8✔
251
                return nil, err
×
252
        }
×
253

254
        tree, err := clone.Worktree()
8✔
255
        if err != nil {
8✔
256
                return nil, err
×
257
        }
×
258
        return scanFs(ctx, tree.Filesystem, map[string]string{})
8✔
259
}
260

261
func scanFs(ctx context.Context, memFS billy.Filesystem, _ map[string]string) ([]*extractor.Inventory, error) {
8✔
262
        // have to down-cast here, because scalibr needs multiple io/fs types
8✔
263
        wrapped, ok := iofs.New(memFS).(scalibr_fs.FS)
8✔
264
        if !ok {
8✔
265
                return nil, fmt.Errorf("error converting filesystem to ReadDirFS")
×
266
        }
×
267

268
        desiredCaps := scalibr_plugin.Capabilities{
8✔
269
                OS:            scalibr_plugin.OSLinux,
8✔
270
                Network:       true,
8✔
271
                DirectFS:      false,
8✔
272
                RunningSystem: false,
8✔
273
        }
8✔
274

8✔
275
        scalibrFs := scalibr_fs.ScanRoot{FS: wrapped}
8✔
276
        scanConfig := scalibr.ScanConfig{
8✔
277
                ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs},
8✔
278
                // All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set.
8✔
279
                FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps),
8✔
280
                Capabilities:         &desiredCaps,
8✔
281
        }
8✔
282

8✔
283
        scanner := scalibr.New()
8✔
284
        scanResults := scanner.Scan(ctx, &scanConfig)
8✔
285

8✔
286
        if scanResults == nil || scanResults.Status == nil {
8✔
287
                return nil, fmt.Errorf("error scanning files: no results")
×
288
        }
×
289
        if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded {
8✔
290
                return nil, fmt.Errorf("error scanning files: %s", scanResults.Status)
×
291
        }
×
292

293
        return scanResults.Inventories, nil
8✔
294
}
295

296
func inventoryToEcosystem(inventory *extractor.Inventory) pbinternal.DepEcosystem {
6✔
297
        if inventory == nil {
6✔
298
                zerolog.Ctx(context.Background()).Warn().Msg("nil ecosystem scanning diffs")
×
299
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_UNSPECIFIED
×
300
        }
×
301

302
        // This should be inventory.PURL()... but there isn't a convenience wrapper yet
303
        package_url, err := inventory.Extractor.ToPURL(inventory)
6✔
304
        if err != nil {
6✔
305
                zerolog.Ctx(context.Background()).Warn().Err(err).Msg("error getting ecosystem from inventory")
×
306
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_UNSPECIFIED
×
307
        }
×
308

309
        // Sometimes Scalibr uses the string "PyPI" instead of "pypi" when reporting the ecosystem.
310
        switch package_url.Type {
6✔
311
        // N.B. using an enum here abitrarily restricts our ability to add new
312
        // ecosystems without a core minder change.  Switching to strings ala
313
        // purl might be an improvement.
314
        case purl.TypePyPi:
2✔
315
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_PYPI
2✔
316
        case purl.TypeNPM:
2✔
317
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_NPM
2✔
318
        case purl.TypeGolang:
2✔
319
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_GO
2✔
320
        default:
×
321
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_UNSPECIFIED
×
322
        }
323
}
324

325
// ingestFileForFullDiff processes a given file's patch from a pull request.
326
// It scans through the patch line by line, identifying the changes made.
327
// If it's a hunk header, it extracts the starting line number. If it's an addition, it records the line content and its number.
328
// The function also increments the line number for context lines (lines that provide context but haven't been modified).
329
func ingestFileForFullDiff(filename, patch, patchUrl string) (*pbinternal.PrContents_File, error) {
×
330
        var result []*pbinternal.PrContents_File_Line
×
331

×
332
        scanner := bufio.NewScanner(strings.NewReader(patch))
×
333
        regex := regexp.MustCompile(`@@ -\d+,\d+ \+(\d+),\d+ @@`)
×
334

×
335
        var currentLineNumber int64
×
336
        var err error
×
337
        for scanner.Scan() {
×
338
                line := scanner.Text()
×
339

×
340
                if matches := regex.FindStringSubmatch(line); matches != nil {
×
341
                        currentLineNumber, err = strconv.ParseInt(matches[1], 10, 32)
×
342
                        if err != nil {
×
343
                                return nil, fmt.Errorf("error parsing line number from the hunk header: %w", err)
×
344
                        }
×
345
                } else if strings.HasPrefix(line, "+") {
×
346
                        result = append(result, &pbinternal.PrContents_File_Line{
×
347
                                Content: line[1:],
×
348
                                // see the use of strconv.ParseInt above: this is a safe downcast
×
349
                                // nolint: gosec
×
350
                                LineNumber: int32(currentLineNumber),
×
351
                        })
×
352

×
353
                        currentLineNumber++
×
354
                } else if !strings.HasPrefix(line, "-") {
×
355
                        currentLineNumber++
×
356
                }
×
357
        }
358

359
        if err := scanner.Err(); err != nil {
×
360
                return nil, fmt.Errorf("error reading patch: %w", err)
×
361
        }
×
362

363
        return &pbinternal.PrContents_File{
×
364
                Name:         filename,
×
365
                FilePatchUrl: patchUrl,
×
366
                PatchLines:   result,
×
367
        }, nil
×
368
}
369

370
func (di *Diff) getEcosystemForFile(filename string) DependencyEcosystem {
5✔
371
        lastComponent := filepath.Base(filename)
5✔
372

5✔
373
        for _, ecoMapping := range di.cfg.Ecosystems {
10✔
374
                if match, _ := filepath.Match(ecoMapping.Depfile, lastComponent); match {
8✔
375
                        return DependencyEcosystem(ecoMapping.Name)
3✔
376
                }
3✔
377
        }
378
        return DepEcosystemNone
2✔
379
}
380

381
func (di *Diff) getParserForFile(filename string, logger zerolog.Logger) ecosystemParser {
×
382
        eco := di.getEcosystemForFile(filename)
×
383
        if eco == DepEcosystemNone {
×
384
                logger.Debug().
×
385
                        Str("filename", filename).
×
386
                        Msg("No ecosystem found, skipping")
×
387
                return nil
×
388
        }
×
389

390
        logger.Debug().
×
391
                Str("filename", filename).
×
392
                Str("package-ecosystem", string(eco)).
×
393
                Msg("matched ecosystem")
×
394

×
395
        return newEcosystemParser(eco)
×
396
}
397

398
// Computes the set of elements in updated which are not in base.
399
// Note: this function may permute (sort) the order of elements in base and updated.
400
func setDifference[Slice ~[]E, E any](base Slice, updated Slice, sorter func(a, b E) int) Slice {
8✔
401

8✔
402
        slices.SortFunc(base, sorter)
8✔
403
        slices.SortFunc(updated, sorter)
8✔
404

8✔
405
        baseIdx, newIdx := 0, 0
8✔
406
        ret := make(Slice, 0)
8✔
407
        for baseIdx < len(base) && newIdx < len(updated) {
25✔
408
                cmpResult := sorter(base[baseIdx], updated[newIdx])
17✔
409
                if cmpResult < 0 {
23✔
410
                        baseIdx++
6✔
411
                } else if cmpResult > 0 {
22✔
412
                        ret = append(ret, updated[newIdx])
5✔
413
                        newIdx++
5✔
414
                } else {
11✔
415
                        baseIdx++
6✔
416
                        newIdx++
6✔
417
                }
6✔
418
        }
419
        if newIdx < len(updated) {
11✔
420
                ret = append(ret, updated[newIdx:]...)
3✔
421
        }
3✔
422

423
        // TODO: add metric for number of deps scanned vs total deps
424

425
        return ret
8✔
426
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc