• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mindersec / minder / 12360611398

16 Dec 2024 08:20PM UTC coverage: 55.476% (+0.1%) from 55.374%
12360611398

Pull #5181

github

web-flow
Merge c6abe06ac into 5e3b3c802
Pull Request #5181: Add support for base and target trees in git ingest, add .tar.gz bundler

302 of 416 new or added lines in 10 files covered. (72.6%)

9 existing lines in 3 files now uncovered.

16963 of 30577 relevant lines covered (55.48%)

38.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.56
/internal/engine/ingester/deps/deps.go
1
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
2
// SPDX-License-Identifier: Apache-2.0
3

4
// Package deps provides the deps rule data ingest engine
5
package deps
6

7
import (
8
        "cmp"
9
        "context"
10
        "errors"
11
        "fmt"
12
        "slices"
13

14
        "github.com/go-git/go-billy/v5/helper/iofs"
15
        "github.com/go-git/go-git/v5/plumbing"
16
        "github.com/go-viper/mapstructure/v2"
17
        "github.com/protobom/protobom/pkg/sbom"
18
        "github.com/rs/zerolog"
19
        "google.golang.org/protobuf/reflect/protoreflect"
20

21
        mdeps "github.com/mindersec/minder/internal/deps"
22
        "github.com/mindersec/minder/internal/deps/scalibr"
23
        engerrors "github.com/mindersec/minder/internal/engine/errors"
24
        pbinternal "github.com/mindersec/minder/internal/proto"
25
        pb "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
26
        "github.com/mindersec/minder/pkg/engine/v1/interfaces"
27
        "github.com/mindersec/minder/pkg/entities/v1/checkpoints"
28
        provifv1 "github.com/mindersec/minder/pkg/providers/v1"
29
)
30

31
const (
32
        // DepsRuleDataIngestType is the type of the deps rule data ingest engine
33
        DepsRuleDataIngestType = "deps"
34
        defaultBranch          = "main"
35
)
36

37
// Deps is the engine for a rule type that uses deps data ingest
38
type Deps struct {
39
        cfg       *pb.DepsType
40
        gitprov   provifv1.Git
41
        extractor mdeps.Extractor
42
}
43

44
// RepoConfig is the set of parameters to the deps rule data ingest engine for repositories
45
type RepoConfig struct {
46
        Branch string `json:"branch" yaml:"branch" mapstructure:"branch"`
47
}
48

49
// PullRequestConfig is the set of parameters to the deps rule data ingest engine for pull requests
50
type PullRequestConfig struct {
51
        Filter string `json:"filter" yaml:"filter" mapstructure:"filter"`
52
}
53

54
// NewDepsIngester creates a new deps rule data ingest engine
55
func NewDepsIngester(cfg *pb.DepsType, gitprov provifv1.Git) (*Deps, error) {
6✔
56
        if gitprov == nil {
6✔
57
                return nil, fmt.Errorf("provider is nil")
×
58
        }
×
59

60
        if cfg == nil {
8✔
61
                cfg = &pb.DepsType{}
2✔
62
        }
2✔
63

64
        return &Deps{
6✔
65
                cfg:       cfg,
6✔
66
                gitprov:   gitprov,
6✔
67
                extractor: scalibr.NewExtractor(),
6✔
68
        }, nil
6✔
69
}
70

71
// GetType returns the type of the git rule data ingest engine
72
func (*Deps) GetType() string {
×
73
        return DepsRuleDataIngestType
×
74
}
×
75

76
// GetConfig returns the config for the git rule data ingest engine
77
func (gi *Deps) GetConfig() protoreflect.ProtoMessage {
×
78
        return gi.cfg
×
79
}
×
80

81
// Ingest does the actual data ingestion for a rule type by cloning a git repo,
82
// and scanning it for dependencies with a dependency extractor
83
func (gi *Deps) Ingest(ctx context.Context, ent protoreflect.ProtoMessage, params map[string]any) (*interfaces.Result, error) {
2✔
84
        switch entity := ent.(type) {
2✔
85
        case *pb.Repository:
2✔
86
                return gi.ingestRepository(ctx, entity, params)
2✔
87
        case *pbinternal.PullRequest:
×
88
                return gi.ingestPullRequest(ctx, entity, params)
×
89
        default:
×
NEW
90
                return nil, fmt.Errorf("deps is only supported for repositories and pull requests")
×
91
        }
92
}
93

94
func (gi *Deps) ingestRepository(ctx context.Context, repo *pb.Repository, params map[string]any) (*interfaces.Result, error) {
2✔
95
        var logger = zerolog.Ctx(ctx)
2✔
96
        // the branch is left unset since we want to auto-discover it
2✔
97
        // in case it's not explicitly set
2✔
98
        userCfg := &RepoConfig{}
2✔
99
        if err := mapstructure.Decode(params, userCfg); err != nil {
2✔
100
                return nil, fmt.Errorf("failed to read dependency ingester configuration from params: %w", err)
×
101
        }
×
102

103
        if repo.GetCloneUrl() == "" {
2✔
104
                return nil, fmt.Errorf("could not get clone url")
×
105
        }
×
106

107
        branch := gi.getBranch(repo, userCfg.Branch)
2✔
108
        logger.Info().Interface("repo", repo).Msgf("extracting dependencies from %s#%s", repo.GetCloneUrl(), branch)
2✔
109

2✔
110
        deps, head, err := gi.scanFromUrl(ctx, repo.GetCloneUrl(), branch)
2✔
111
        if err != nil {
2✔
112
                return nil, fmt.Errorf("could not scan filesystem: %w", err)
×
113
        }
×
114

115
        logger.Debug().Interface("deps", deps).Msgf("Scanning successful: %d nodes found", len(deps.Nodes))
2✔
116

2✔
117
        hsh := head.Hash()
2✔
118

2✔
119
        chkpoint := checkpoints.NewCheckpointV1Now().
2✔
120
                WithBranch(branch).
2✔
121
                WithCommitHash(hsh.String())
2✔
122

2✔
123
        return &interfaces.Result{
2✔
124
                Object: map[string]any{
2✔
125
                        "node_list": deps,
2✔
126
                },
2✔
127
                Checkpoint: chkpoint,
2✔
128
        }, nil
2✔
129
}
130

131
func (gi *Deps) getBranch(repo *pb.Repository, userConfigBranch string) string {
6✔
132
        // If the user has specified a branch, use that
6✔
133
        if userConfigBranch != "" {
7✔
134
                return userConfigBranch
1✔
135
        }
1✔
136

137
        // If the branch is provided in the rule-type
138
        // configuration, use that
139
        if gi.cfg.GetRepo().GetBranch() != "" {
6✔
140
                return gi.cfg.GetRepo().GetBranch()
1✔
141
        }
1✔
142
        if repo.GetDefaultBranch() != "" {
5✔
143
                return repo.GetDefaultBranch()
1✔
144
        }
1✔
145

146
        // If the branch is not provided in the rule-type
147
        // configuration, use the default branch
148
        return defaultBranch
3✔
149
}
150

151
// ingestTypes returns a sorter function for the given filter type.
152
// items which compare equal are skipped in output.
153
var ingestTypes = map[string]func(*sbom.Node, *sbom.Node) int{
154
        "new": func(base *sbom.Node, updated *sbom.Node) int {
7✔
155
                return cmp.Compare(base.GetName(), updated.GetName())
7✔
156
        },
7✔
157
        "new_and_updated": func(base *sbom.Node, updated *sbom.Node) int {
6✔
158
                return nodeSorter(base, updated)
6✔
159
        },
6✔
160
        "all": func(_ *sbom.Node, _ *sbom.Node) int {
2✔
161
                return -1
2✔
162
        },
2✔
163
}
164

165
func nodeSorter(a *sbom.Node, b *sbom.Node) int {
644✔
166
        // If we compare by name and version first, we can avoid computing map keys.
644✔
167
        res := cmp.Or(cmp.Compare(a.GetName(), b.GetName()),
644✔
168
                cmp.Compare(a.GetVersion(), b.GetVersion()))
644✔
169
        if res != 0 {
1,286✔
170
                return res
642✔
171
        }
642✔
172
        // Same name and version, compare hashes.  Go's shuffling map keys does not help here.
173
        aHashes := make([]int32, 0, len(a.GetHashes()))
2✔
174
        for algo := range a.GetHashes() {
4✔
175
                aHashes = append(aHashes, algo)
2✔
176
        }
2✔
177
        slices.Sort(aHashes)
2✔
178
        bHashes := make([]int32, 0, len(b.GetHashes()))
2✔
179
        for algo := range b.GetHashes() {
4✔
180
                bHashes = append(bHashes, algo)
2✔
181
        }
2✔
182
        slices.Sort(bHashes)
2✔
183
        for i, algo := range aHashes {
4✔
184
                if i >= len(bHashes) {
2✔
185
                        return 1
×
186
                }
×
187
                if r := cmp.Compare(algo, bHashes[i]); r != 0 {
2✔
188
                        return r
×
189
                }
×
190
                if r := cmp.Compare(a.GetHashes()[algo], b.GetHashes()[algo]); r != 0 {
3✔
191
                        return r
1✔
192
                }
1✔
193
        }
194
        if len(aHashes) < len(bHashes) {
1✔
195
                return -1
×
196
        }
×
197
        return 0
1✔
198
}
199

200
func filterNodes(base []*sbom.Node, updated []*sbom.Node, compare func(*sbom.Node, *sbom.Node) int) []*sbom.Node {
7✔
201
        slices.SortFunc(base, nodeSorter)
7✔
202
        slices.SortFunc(updated, nodeSorter)
7✔
203

7✔
204
        ret := make([]*sbom.Node, 0, len(updated))
7✔
205

7✔
206
        baseIdx, newIdx := 0, 0
7✔
207
        for baseIdx < len(base) && newIdx < len(updated) {
22✔
208
                cmpResult := compare(base[baseIdx], updated[newIdx])
15✔
209
                if cmpResult < 0 {
22✔
210
                        baseIdx++
7✔
211
                } else if cmpResult > 0 {
17✔
212
                        ret = append(ret, updated[newIdx])
2✔
213
                        newIdx++
2✔
214
                } else {
8✔
215
                        newIdx++
6✔
216
                }
6✔
217
        }
218
        if newIdx < len(updated) {
9✔
219
                ret = append(ret, updated[newIdx:]...)
2✔
220
        }
2✔
221
        return ret
7✔
222
}
223

224
func (gi *Deps) ingestPullRequest(
225
        ctx context.Context, pr *pbinternal.PullRequest, params map[string]any) (*interfaces.Result, error) {
×
226
        userCfg := &PullRequestConfig{}
×
227
        if err := mapstructure.Decode(params, userCfg); err != nil {
×
228
                return nil, fmt.Errorf("failed to read dependency ingester configuration from params: %w", err)
×
229
        }
×
230
        if _, ok := ingestTypes[userCfg.Filter]; !ok {
×
231
                return nil, fmt.Errorf("invalid filter type: %s", userCfg.Filter)
×
232
        }
×
233

234
        if pr.GetBaseCloneUrl() == "" {
×
235
                return nil, errors.New("could not get base clone url")
×
236
        }
×
237
        if pr.GetTargetCloneUrl() == "" {
×
238
                return nil, errors.New("could not get head clone url")
×
239
        }
×
240
        baseDeps, _, err := gi.scanFromUrl(ctx, pr.GetBaseCloneUrl(), pr.GetBaseRef())
×
241
        if err != nil {
×
242
                return nil, fmt.Errorf("could not scan base filesystem: %w", err)
×
243
        }
×
244
        targetDeps, ref, err := gi.scanFromUrl(ctx, pr.GetTargetCloneUrl(), pr.GetTargetRef())
×
245
        if err != nil {
×
246
                return nil, fmt.Errorf("could not scan target filesystem: %w", err)
×
247
        }
×
248

249
        // Overwrite the target list of nodes with the result of filtering by desired match.
250
        // We checked that the filter is valid at the top of the function.
251
        targetDeps.Nodes = filterNodes(baseDeps.GetNodes(), targetDeps.GetNodes(), ingestTypes[userCfg.Filter])
×
252

×
253
        chkpoint := checkpoints.NewCheckpointV1Now().
×
254
                WithBranch(pr.GetTargetRef()).
×
255
                WithCommitHash(ref.Hash().String())
×
256

×
257
        return &interfaces.Result{
×
258
                Object: map[string]any{
×
259
                        "node_list": targetDeps,
×
260
                },
×
261
                Checkpoint: chkpoint,
×
262
        }, nil
×
263
}
264

265
// TODO: this first part is fairly shared with fetchClone from ../git/git.go.
266
func (gi *Deps) scanFromUrl(ctx context.Context, url string, branch string) (*sbom.NodeList, *plumbing.Reference, error) {
2✔
267
        // We clone to the memfs go-billy filesystem driver, which doesn't
2✔
268
        // allow for direct access to the underlying filesystem. This is
2✔
269
        // because we want to be able to run this in a sandboxed environment
2✔
270
        // where we don't have access to the underlying filesystem.
2✔
271
        repo, err := gi.gitprov.Clone(ctx, url, branch)
2✔
272
        if err != nil {
2✔
273
                if errors.Is(err, provifv1.ErrProviderGitBranchNotFound) {
×
274
                        return nil, nil, fmt.Errorf("%w: %s: branch %s", engerrors.ErrEvaluationFailed,
×
275
                                provifv1.ErrProviderGitBranchNotFound, branch)
×
276
                } else if errors.Is(err, provifv1.ErrRepositoryEmpty) {
×
277
                        return nil, nil, fmt.Errorf("%w: %s", engerrors.ErrEvaluationSkipped, provifv1.ErrRepositoryEmpty)
×
278
                }
×
279
                return nil, nil, err
×
280
        }
281

282
        wt, err := repo.Worktree()
2✔
283
        if err != nil {
2✔
284
                return nil, nil, fmt.Errorf("could not get worktree: %w", err)
×
285
        }
×
286

287
        if wt.Filesystem == nil {
2✔
288
                return nil, nil, fmt.Errorf("could not get filesystem")
×
289
        }
×
290

291
        deps, err := gi.extractor.ScanFilesystem(ctx, iofs.New(wt.Filesystem))
2✔
292
        if err != nil {
2✔
293
                return nil, nil, fmt.Errorf("%T extractor: %w", gi.extractor, err)
×
294
        }
×
295

296
        ref, err := repo.Head()
2✔
297
        if err != nil {
2✔
298
                return nil, nil, fmt.Errorf("could not get head: %w", err)
×
299
        }
×
300

301
        return deps, ref, nil
2✔
302
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc