• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mindersec / minder / 14315603253

07 Apr 2025 05:28PM UTC coverage: 56.758% (-0.01%) from 56.772%
14315603253

push

github

web-flow
build(deps): bump golangci/golangci-lint-action from 6.5.2 to 7.0.0 (#5548)

* build(deps): bump golangci/golangci-lint-action from 6.5.2 to 7.0.0

Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 6.5.2 to 7.0.0.
- [Release notes](https://github.com/golangci/golangci-lint-action/releases)
- [Commits](https://github.com/golangci/golangci-lint-action/compare/55c2c1448...148140484)

---
updated-dependencies:
- dependency-name: golangci/golangci-lint-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Migrate to golangci-lint version 2

* Fix newly-detected golangci-lint issues

* Fix remaining lint issues from new golangci-lint

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Evan Anderson <evan@stacklok.com>

53 of 164 new or added lines in 78 files covered. (32.32%)

2 existing lines in 1 file now uncovered.

18301 of 32244 relevant lines covered (56.76%)

36.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

48.36
/internal/engine/ingester/diff/diff.go
1
// SPDX-FileCopyrightText: Copyright 2023 The Minder Authors
2
// SPDX-License-Identifier: Apache-2.0
3

4
// Package diff provides the diff rule data ingest engine
5
package diff
6

7
import (
8
        "bufio"
9
        "cmp"
10
        "context"
11
        "fmt"
12
        "math"
13
        "path/filepath"
14
        "regexp"
15
        "slices"
16
        "strconv"
17
        "strings"
18

19
        "github.com/go-git/go-billy/v5"
20
        "github.com/go-git/go-billy/v5/helper/iofs"
21
        scalibr "github.com/google/osv-scalibr"
22
        "github.com/google/osv-scalibr/extractor"
23
        "github.com/google/osv-scalibr/extractor/filesystem/list"
24
        scalibr_fs "github.com/google/osv-scalibr/fs"
25
        scalibr_plugin "github.com/google/osv-scalibr/plugin"
26
        "github.com/google/osv-scalibr/purl"
27
        "github.com/rs/zerolog"
28
        "google.golang.org/protobuf/reflect/protoreflect"
29

30
        pbinternal "github.com/mindersec/minder/internal/proto"
31
        pb "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
32
        "github.com/mindersec/minder/pkg/engine/v1/interfaces"
33
        "github.com/mindersec/minder/pkg/entities/v1/checkpoints"
34
        provifv1 "github.com/mindersec/minder/pkg/providers/v1"
35
)
36

37
const (
38
        // DiffRuleDataIngestType is the type of the diff rule data ingest engine
39
        DiffRuleDataIngestType = "diff"
40
        prFilesPerPage         = 30
41
        wildcard               = "*"
42
)
43

44
// Diff is the diff rule data ingest engine
45
type Diff struct {
46
        cli provifv1.GitHub
47
        cfg *pb.DiffType
48
}
49

50
// NewDiffIngester creates a new diff ingester
51
func NewDiffIngester(
52
        cfg *pb.DiffType,
53
        cli provifv1.GitHub,
54
) (*Diff, error) {
4✔
55
        if cfg == nil {
4✔
56
                cfg = &pb.DiffType{}
×
57
        }
×
58

59
        if cli == nil {
4✔
60
                return nil, fmt.Errorf("provider is nil")
×
61
        }
×
62

63
        return &Diff{
4✔
64
                cfg: cfg,
4✔
65
                cli: cli,
4✔
66
        }, nil
4✔
67
}
68

69
// GetType returns the type of the diff rule data ingest engine
70
func (*Diff) GetType() string {
3✔
71
        return DiffRuleDataIngestType
3✔
72
}
3✔
73

74
// GetConfig returns the config for the diff rule data ingest engine
75
func (di *Diff) GetConfig() protoreflect.ProtoMessage {
6✔
76
        return di.cfg
6✔
77
}
6✔
78

79
// Ingest ingests a diff from a pull request in accordance with its type
80
//
81
//nolint:gocyclo
82
func (di *Diff) Ingest(
83
        ctx context.Context,
84
        ent protoreflect.ProtoMessage,
85
        _ map[string]any,
86
) (*interfaces.Result, error) {
4✔
87
        pr, ok := ent.(*pbinternal.PullRequest)
4✔
88
        if !ok {
4✔
89
                return nil, fmt.Errorf("entity is not a pull request")
×
90
        }
×
91

92
        // The GitHub Go API takes an int32, but our proto stores an int64; make sure we don't overflow
93
        if pr.Number > math.MaxInt {
4✔
94
                return nil, fmt.Errorf("pr number is too large")
×
95
        }
×
96
        prNumber := int(pr.Number)
4✔
97

4✔
98
        switch di.cfg.GetType() {
4✔
99
        case "", pb.DiffTypeDep:
×
100
                return di.getDepTypeDiff(ctx, prNumber, pr)
×
101

102
        case pb.DiffTypeNewDeps:
4✔
103
                // TODO: once we've tested some, convert DiffTypeDep to use this algorithm.
4✔
104
                return di.getScalibrTypeDiff(ctx, prNumber, pr)
4✔
105

106
        case pb.DiffTypeFull:
×
107
                return di.getFullTypeDiff(ctx, prNumber, pr)
×
108

109
        default:
×
110
                return nil, fmt.Errorf("unknown diff type")
×
111
        }
112
}
113

114
func (di *Diff) getDepTypeDiff(ctx context.Context, prNumber int, pr *pbinternal.PullRequest) (*interfaces.Result, error) {
×
115
        deps := pbinternal.PrDependencies{Pr: pr}
×
116
        page := 0
×
117

×
118
        for {
×
119
                prFiles, resp, err := di.cli.ListFiles(ctx, pr.RepoOwner, pr.RepoName, prNumber, prFilesPerPage, page)
×
120
                if err != nil {
×
121
                        return nil, fmt.Errorf("error getting pull request files: %w", err)
×
122
                }
×
123

124
                for _, file := range prFiles {
×
125
                        fileDiffs, err := di.ingestFileForDepDiff(file.GetFilename(), file.GetPatch(), file.GetRawURL(), *zerolog.Ctx(ctx))
×
126
                        if err != nil {
×
127
                                return nil, fmt.Errorf("error ingesting file %s: %w", file.GetFilename(), err)
×
128
                        }
×
129
                        deps.Deps = append(deps.Deps, fileDiffs...)
×
130
                }
131

132
                if resp.NextPage == 0 {
×
133
                        break
×
134
                }
135

136
                page = resp.NextPage
×
137
        }
138

139
        return &interfaces.Result{Object: &deps, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
×
140
}
141

142
func (di *Diff) getFullTypeDiff(ctx context.Context, prNumber int, pr *pbinternal.PullRequest) (*interfaces.Result, error) {
×
143
        diff := &pbinternal.PrContents{Pr: pr}
×
144
        page := 0
×
145

×
146
        for {
×
147
                prFiles, resp, err := di.cli.ListFiles(ctx, pr.RepoOwner, pr.RepoName, prNumber, prFilesPerPage, page)
×
148
                if err != nil {
×
149
                        return nil, fmt.Errorf("error getting pull request files: %w", err)
×
150
                }
×
151

152
                for _, file := range prFiles {
×
153
                        fileDiffs, err := ingestFileForFullDiff(file.GetFilename(), file.GetPatch(), file.GetRawURL())
×
154
                        if err != nil {
×
155
                                return nil, fmt.Errorf("error ingesting file %s: %w", file.GetFilename(), err)
×
156
                        }
×
157
                        diff.Files = append(diff.Files, fileDiffs)
×
158
                }
159

160
                if resp.NextPage == 0 {
×
161
                        break
×
162
                }
163

164
                page = resp.NextPage
×
165
        }
166

167
        return &interfaces.Result{Object: diff, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
×
168
}
169

170
func (di *Diff) ingestFileForDepDiff(
171
        filename, patchContents, patchUrl string,
172
        logger zerolog.Logger,
173
) ([]*pbinternal.PrDependencies_ContextualDependency, error) {
×
174
        parser := di.getParserForFile(filename, logger)
×
175
        if parser == nil {
×
176
                return nil, nil
×
177
        }
×
178

179
        depBatch, err := parser(patchContents)
×
180
        if err != nil {
×
181
                return nil, fmt.Errorf("error parsing file %s: %w", filename, err)
×
182
        }
×
183

184
        batchCtxDeps := make([]*pbinternal.PrDependencies_ContextualDependency, 0, len(depBatch))
×
185
        for i := range depBatch {
×
186
                dep := depBatch[i]
×
187
                batchCtxDeps = append(batchCtxDeps, &pbinternal.PrDependencies_ContextualDependency{
×
188
                        Dep: dep,
×
189
                        File: &pbinternal.PrDependencies_ContextualDependency_FilePatch{
×
190
                                Name:     filename,
×
191
                                PatchUrl: patchUrl,
×
192
                        },
×
193
                })
×
194
        }
×
195

196
        return batchCtxDeps, nil
×
197
}
198

199
func (di *Diff) getScalibrTypeDiff(ctx context.Context, _ int, pr *pbinternal.PullRequest) (*interfaces.Result, error) {
4✔
200
        deps := pbinternal.PrDependencies{Pr: pr}
4✔
201

4✔
202
        // TODO: we should be able to just fetch the additional commits between base and target.
4✔
203
        // Our current Git abstraction isn't quite powerful enough, so we do two shallow clones.
4✔
204

4✔
205
        baseInventory, err := di.scalibrInventory(ctx, pr.BaseCloneUrl, pr.BaseRef)
4✔
206
        if err != nil {
4✔
NEW
207
                return nil, fmt.Errorf("failed to clone base from %s at %q: %w", pr.BaseCloneUrl, pr.BaseRef, err)
×
208
        }
×
209
        newInventory, err := di.scalibrInventory(ctx, pr.TargetCloneUrl, pr.TargetRef)
4✔
210
        if err != nil {
4✔
NEW
211
                return nil, fmt.Errorf("failed to clone fork from %s at %q: %w", pr.TargetCloneUrl, pr.TargetRef, err)
×
212
        }
×
213

214
        newDeps := setDifference(baseInventory, newInventory, inventorySorter)
4✔
215

4✔
216
        deps.Deps = make([]*pbinternal.PrDependencies_ContextualDependency, 0, len(newDeps))
4✔
217
        for _, inventory := range newDeps {
10✔
218
                for _, filename := range inventory.Locations {
12✔
219
                        deps.Deps = append(deps.Deps, &pbinternal.PrDependencies_ContextualDependency{
6✔
220
                                Dep: &pbinternal.Dependency{
6✔
221
                                        Ecosystem: inventoryToEcosystem(inventory),
6✔
222
                                        Name:      inventory.Name,
6✔
223
                                        Version:   inventory.Version,
6✔
224
                                },
6✔
225
                                File: &pbinternal.PrDependencies_ContextualDependency_FilePatch{
6✔
226
                                        Name:     filename,
6✔
227
                                        PatchUrl: "", // TODO: do we need this?
6✔
228
                                },
6✔
229
                        })
6✔
230
                }
6✔
231
        }
232

233
        return &interfaces.Result{Object: &deps, Checkpoint: checkpoints.NewCheckpointV1Now()}, nil
4✔
234
}
235

236
func inventorySorter(a *extractor.Inventory, b *extractor.Inventory) int {
22✔
237
        // If we compare by name and version first, we can avoid serializing Locations to strings
22✔
238
        res := cmp.Or(cmp.Compare(a.Name, b.Name), cmp.Compare(a.Version, b.Version))
22✔
239
        if res != 0 {
42✔
240
                return res
20✔
241
        }
20✔
242
        // TODO: Locations should probably be sorted, but scalibr is going to export a compare function.
243
        aLoc := fmt.Sprintf("%v", a.Locations)
2✔
244
        bLoc := fmt.Sprintf("%v", b.Locations)
2✔
245
        return cmp.Compare(aLoc, bLoc)
2✔
246
}
247

248
func (di *Diff) scalibrInventory(ctx context.Context, repoURL string, ref string) ([]*extractor.Inventory, error) {
8✔
249
        clone, err := di.cli.Clone(ctx, repoURL, ref)
8✔
250
        if err != nil {
8✔
251
                return nil, err
×
252
        }
×
253

254
        tree, err := clone.Worktree()
8✔
255
        if err != nil {
8✔
256
                return nil, err
×
257
        }
×
258
        return scanFs(ctx, tree.Filesystem, map[string]string{})
8✔
259
}
260

261
func scanFs(ctx context.Context, memFS billy.Filesystem, _ map[string]string) ([]*extractor.Inventory, error) {
8✔
262
        // have to down-cast here, because scalibr needs multiple io/fs types
8✔
263
        wrapped, ok := iofs.New(memFS).(scalibr_fs.FS)
8✔
264
        if !ok {
8✔
265
                return nil, fmt.Errorf("error converting filesystem to ReadDirFS")
×
266
        }
×
267

268
        desiredCaps := scalibr_plugin.Capabilities{
8✔
269
                OS:            scalibr_plugin.OSLinux,
8✔
270
                Network:       true,
8✔
271
                DirectFS:      false,
8✔
272
                RunningSystem: false,
8✔
273
        }
8✔
274

8✔
275
        scalibrFs := scalibr_fs.ScanRoot{FS: wrapped}
8✔
276
        scanConfig := scalibr.ScanConfig{
8✔
277
                ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs},
8✔
278
                // All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set.
8✔
279
                FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps),
8✔
280
                Capabilities:         &desiredCaps,
8✔
281
        }
8✔
282

8✔
283
        scanner := scalibr.New()
8✔
284
        scanResults := scanner.Scan(ctx, &scanConfig)
8✔
285

8✔
286
        if scanResults == nil || scanResults.Status == nil {
8✔
287
                return nil, fmt.Errorf("error scanning files: no results")
×
288
        }
×
289
        if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded {
8✔
290
                return nil, fmt.Errorf("error scanning files: %s", scanResults.Status)
×
291
        }
×
292

293
        return scanResults.Inventories, nil
8✔
294
}
295

296
func inventoryToEcosystem(inventory *extractor.Inventory) pbinternal.DepEcosystem {
6✔
297
        if inventory == nil {
6✔
298
                zerolog.Ctx(context.Background()).Warn().Msg("nil ecosystem scanning diffs")
×
299
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_UNSPECIFIED
×
300
        }
×
301

302
        // This should be inventory.PURL()... but there isn't a convenience wrapper yet
303
        package_url := inventory.Extractor.ToPURL(inventory)
6✔
304

6✔
305
        // Sometimes Scalibr uses the string "PyPI" instead of "pypi" when reporting the ecosystem.
6✔
306
        switch package_url.Type {
6✔
307
        // N.B. using an enum here abitrarily restricts our ability to add new
308
        // ecosystems without a core minder change.  Switching to strings ala
309
        // purl might be an improvement.
310
        case purl.TypePyPi:
2✔
311
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_PYPI
2✔
312
        case purl.TypeNPM:
2✔
313
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_NPM
2✔
314
        case purl.TypeGolang:
2✔
315
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_GO
2✔
316
        default:
×
317
                return pbinternal.DepEcosystem_DEP_ECOSYSTEM_UNSPECIFIED
×
318
        }
319
}
320

321
// ingestFileForFullDiff processes a given file's patch from a pull request.
322
// It scans through the patch line by line, identifying the changes made.
323
// If it's a hunk header, it extracts the starting line number. If it's an addition, it records the line content and its number.
324
// The function also increments the line number for context lines (lines that provide context but haven't been modified).
325
func ingestFileForFullDiff(filename, patch, patchUrl string) (*pbinternal.PrContents_File, error) {
×
326
        var result []*pbinternal.PrContents_File_Line
×
327

×
328
        scanner := bufio.NewScanner(strings.NewReader(patch))
×
329
        regex := regexp.MustCompile(`@@ -\d+,\d+ \+(\d+),\d+ @@`)
×
330

×
331
        var currentLineNumber int64
×
332
        var err error
×
333
        for scanner.Scan() {
×
334
                line := scanner.Text()
×
335

×
336
                if matches := regex.FindStringSubmatch(line); matches != nil {
×
337
                        currentLineNumber, err = strconv.ParseInt(matches[1], 10, 32)
×
338
                        if err != nil {
×
339
                                return nil, fmt.Errorf("error parsing line number from the hunk header: %w", err)
×
340
                        }
×
341
                } else if strings.HasPrefix(line, "+") {
×
342
                        result = append(result, &pbinternal.PrContents_File_Line{
×
343
                                Content: line[1:],
×
344
                                // see the use of strconv.ParseInt above: this is a safe downcast
×
345
                                // nolint: gosec
×
346
                                LineNumber: int32(currentLineNumber),
×
347
                        })
×
348

×
349
                        currentLineNumber++
×
350
                } else if !strings.HasPrefix(line, "-") {
×
351
                        currentLineNumber++
×
352
                }
×
353
        }
354

355
        if err := scanner.Err(); err != nil {
×
356
                return nil, fmt.Errorf("error reading patch: %w", err)
×
357
        }
×
358

359
        return &pbinternal.PrContents_File{
×
360
                Name:         filename,
×
361
                FilePatchUrl: patchUrl,
×
362
                PatchLines:   result,
×
363
        }, nil
×
364
}
365

366
func (di *Diff) getEcosystemForFile(filename string) DependencyEcosystem {
5✔
367
        lastComponent := filepath.Base(filename)
5✔
368

5✔
369
        for _, ecoMapping := range di.cfg.Ecosystems {
10✔
370
                if match, _ := filepath.Match(ecoMapping.Depfile, lastComponent); match {
8✔
371
                        return DependencyEcosystem(ecoMapping.Name)
3✔
372
                }
3✔
373
        }
374
        return DepEcosystemNone
2✔
375
}
376

377
func (di *Diff) getParserForFile(filename string, logger zerolog.Logger) ecosystemParser {
×
378
        eco := di.getEcosystemForFile(filename)
×
379
        if eco == DepEcosystemNone {
×
380
                logger.Debug().
×
381
                        Str("filename", filename).
×
382
                        Msg("No ecosystem found, skipping")
×
383
                return nil
×
384
        }
×
385

386
        logger.Debug().
×
387
                Str("filename", filename).
×
388
                Str("package-ecosystem", string(eco)).
×
389
                Msg("matched ecosystem")
×
390

×
391
        return newEcosystemParser(eco)
×
392
}
393

394
// Computes the set of elements in updated which are not in base.
395
// Note: this function may permute (sort) the order of elements in base and updated.
396
func setDifference[Slice ~[]E, E any](base Slice, updated Slice, sorter func(a, b E) int) Slice {
8✔
397

8✔
398
        slices.SortFunc(base, sorter)
8✔
399
        slices.SortFunc(updated, sorter)
8✔
400

8✔
401
        baseIdx, newIdx := 0, 0
8✔
402
        ret := make(Slice, 0)
8✔
403
        for baseIdx < len(base) && newIdx < len(updated) {
25✔
404
                cmpResult := sorter(base[baseIdx], updated[newIdx])
17✔
405
                if cmpResult < 0 {
23✔
406
                        baseIdx++
6✔
407
                } else if cmpResult > 0 {
22✔
408
                        ret = append(ret, updated[newIdx])
5✔
409
                        newIdx++
5✔
410
                } else {
11✔
411
                        baseIdx++
6✔
412
                        newIdx++
6✔
413
                }
6✔
414
        }
415
        if newIdx < len(updated) {
11✔
416
                ret = append(ret, updated[newIdx:]...)
3✔
417
        }
3✔
418

419
        // TODO: add metric for number of deps scanned vs total deps
420

421
        return ret
8✔
422
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc