• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 21766436180

06 Feb 2026 09:24PM UTC coverage: 80.125% (+0.07%) from 80.057%
21766436180

Pull #70

github

umputun
fix: exit review loop when no changes detected

add HEAD hash comparison fallback in runClaudeReviewLoop — if claude
makes no commits during an iteration, the loop exits early instead of
running to max iterations. also update embedded review prompts to use
run_in_background + TaskOutput pattern for reliable agent completion.

Related to #69
Pull Request #70: fix: exit review loop when no changes detected

37 of 48 new or added lines in 4 files covered. (77.08%)

2 existing lines in 1 file now uncovered.

4491 of 5605 relevant lines covered (80.12%)

157.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.05
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "os"
9
        "os/exec"
10
        "strings"
11
        "time"
12

13
        "github.com/umputun/ralphex/pkg/config"
14
        "github.com/umputun/ralphex/pkg/executor"
15
        "github.com/umputun/ralphex/pkg/status"
16
)
17

18
// DefaultIterationDelay is the pause between iterations to allow system to settle.
19
const DefaultIterationDelay = 2 * time.Second
20

21
const (
22
        minReviewIterations    = 3    // minimum claude review iterations
23
        reviewIterationDivisor = 10   // review iterations = max_iterations / divisor
24
        minCodexIterations     = 3    // minimum codex review iterations
25
        codexIterationDivisor  = 5    // codex iterations = max_iterations / divisor
26
        minPlanIterations      = 5    // minimum plan creation iterations
27
        planIterationDivisor   = 5    // plan iterations = max_iterations / divisor
28
        maxCodexSummaryLen     = 5000 // max chars for codex output summary
29
)
30

31
// Mode represents the execution mode.
32
type Mode string
33

34
const (
35
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
36
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
37
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
38
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
39
        ModePlan      Mode = "plan"       // interactive plan creation mode
40
)
41

42
// Config holds runner configuration.
43
type Config struct {
44
        PlanFile         string         // path to plan file (required for full mode)
45
        PlanDescription  string         // plan description for interactive plan creation mode
46
        ProgressPath     string         // path to progress file
47
        Mode             Mode           // execution mode
48
        MaxIterations    int            // maximum iterations for task phase
49
        Debug            bool           // enable debug output
50
        NoColor          bool           // disable color output
51
        IterationDelayMs int            // delay between iterations in milliseconds
52
        TaskRetryCount   int            // number of times to retry failed tasks
53
        CodexEnabled     bool           // whether codex review is enabled
54
        FinalizeEnabled  bool           // whether finalize step is enabled
55
        DefaultBranch    string         // default branch name (detected from repo)
56
        AppConfig        *config.Config // full application config (for executors and prompts)
57
}
58

59
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
60
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
61
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
62
//go:generate moq -out mocks/git_checker.go -pkg mocks -skip-ensure -fmt goimports . GitChecker
63

64
// Executor runs CLI commands and returns results.
65
type Executor interface {
66
        Run(ctx context.Context, prompt string) executor.Result
67
}
68

69
// Logger provides logging functionality.
70
type Logger interface {
71
        SetPhase(phase status.Phase)
72
        Print(format string, args ...any)
73
        PrintRaw(format string, args ...any)
74
        PrintSection(section status.Section)
75
        PrintAligned(text string)
76
        LogQuestion(question string, options []string)
77
        LogAnswer(answer string)
78
        LogDraftReview(action string, feedback string)
79
        Path() string
80
}
81

82
// InputCollector provides interactive input collection for plan creation.
83
type InputCollector interface {
84
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
85
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
86
}
87

88
// GitChecker provides git state inspection for the review loop.
89
type GitChecker interface {
90
        HeadHash() (string, error)
91
}
92

93
// Runner orchestrates the execution loop.
94
type Runner struct {
95
        cfg            Config
96
        log            Logger
97
        claude         Executor
98
        codex          Executor
99
        custom         *executor.CustomExecutor
100
        git            GitChecker
101
        inputCollector InputCollector
102
        iterationDelay time.Duration
103
        taskRetryCount int
104
}
105

106
// New creates a new Runner with the given configuration.
107
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
108
func New(cfg Config, log Logger) *Runner {
3✔
109
        // build claude executor with config values
3✔
110
        claudeExec := &executor.ClaudeExecutor{
3✔
111
                OutputHandler: func(text string) {
3✔
112
                        log.PrintAligned(text)
×
113
                },
×
114
                Debug: cfg.Debug,
115
        }
116
        if cfg.AppConfig != nil {
6✔
117
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
3✔
118
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
3✔
119
                claudeExec.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
3✔
120
        }
3✔
121

122
        // build codex executor with config values
123
        codexExec := &executor.CodexExecutor{
3✔
124
                OutputHandler: func(text string) {
3✔
125
                        log.PrintAligned(text)
×
126
                },
×
127
                Debug: cfg.Debug,
128
        }
129
        if cfg.AppConfig != nil {
6✔
130
                codexExec.Command = cfg.AppConfig.CodexCommand
3✔
131
                codexExec.Model = cfg.AppConfig.CodexModel
3✔
132
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
3✔
133
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
3✔
134
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
3✔
135
                codexExec.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
3✔
136
        }
3✔
137

138
        // build custom executor if custom review script is configured
139
        var customExec *executor.CustomExecutor
3✔
140
        if cfg.AppConfig != nil && cfg.AppConfig.CustomReviewScript != "" {
4✔
141
                customExec = &executor.CustomExecutor{
1✔
142
                        Script: cfg.AppConfig.CustomReviewScript,
1✔
143
                        OutputHandler: func(text string) {
1✔
144
                                log.PrintAligned(text)
×
145
                        },
×
146
                        ErrorPatterns: cfg.AppConfig.CodexErrorPatterns, // reuse codex error patterns
147
                }
148
        }
149

150
        // auto-disable codex if the binary is not installed AND we need codex
151
        // (skip this check if using custom external review tool or external review is disabled)
152
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
4✔
153
                codexCmd := codexExec.Command
1✔
154
                if codexCmd == "" {
1✔
155
                        codexCmd = "codex"
×
156
                }
×
157
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
158
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
159
                        cfg.CodexEnabled = false
1✔
160
                }
1✔
161
        }
162

163
        return NewWithExecutors(cfg, log, claudeExec, codexExec, customExec, nil)
3✔
164
}
165

166
// NewWithExecutors creates a new Runner with custom executors (for testing).
167
func NewWithExecutors(cfg Config, log Logger, claude, codex Executor, custom *executor.CustomExecutor, git GitChecker) *Runner {
67✔
168
        // determine iteration delay from config or default
67✔
169
        iterDelay := DefaultIterationDelay
67✔
170
        if cfg.IterationDelayMs > 0 {
83✔
171
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
16✔
172
        }
16✔
173

174
        // determine task retry count from config
175
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
176
        retryCount := 1
67✔
177
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
122✔
178
                retryCount = cfg.TaskRetryCount
55✔
179
        } else if cfg.TaskRetryCount > 0 {
68✔
180
                retryCount = cfg.TaskRetryCount
1✔
181
        }
1✔
182

183
        return &Runner{
67✔
184
                cfg:            cfg,
67✔
185
                log:            log,
67✔
186
                claude:         claude,
67✔
187
                codex:          codex,
67✔
188
                custom:         custom,
67✔
189
                git:            git,
67✔
190
                iterationDelay: iterDelay,
67✔
191
                taskRetryCount: retryCount,
67✔
192
        }
67✔
193
}
194

195
// SetInputCollector sets the input collector for plan creation mode.
196
func (r *Runner) SetInputCollector(c InputCollector) {
15✔
197
        r.inputCollector = c
15✔
198
}
15✔
199

200
// SetGitChecker sets the git checker for no-commit detection in review loops.
NEW
201
func (r *Runner) SetGitChecker(g GitChecker) {
×
NEW
202
        r.git = g
×
NEW
203
}
×
204

205
// Run executes the main loop based on configured mode.
206
func (r *Runner) Run(ctx context.Context) error {
55✔
207
        switch r.cfg.Mode {
55✔
208
        case ModeFull:
13✔
209
                return r.runFull(ctx)
13✔
210
        case ModeReview:
11✔
211
                return r.runReviewOnly(ctx)
11✔
212
        case ModeCodexOnly:
10✔
213
                return r.runCodexOnly(ctx)
10✔
214
        case ModeTasksOnly:
4✔
215
                return r.runTasksOnly(ctx)
4✔
216
        case ModePlan:
16✔
217
                return r.runPlanCreation(ctx)
16✔
218
        default:
1✔
219
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
220
        }
221
}
222

223
// runFull executes the complete pipeline: tasks → review → codex → review.
224
func (r *Runner) runFull(ctx context.Context) error {
13✔
225
        if r.cfg.PlanFile == "" {
14✔
226
                return errors.New("plan file required for full mode")
1✔
227
        }
1✔
228

229
        // phase 1: task execution
230
        r.log.SetPhase(status.PhaseTask)
12✔
231
        r.log.PrintRaw("starting task execution phase\n")
12✔
232

12✔
233
        if err := r.runTaskPhase(ctx); err != nil {
18✔
234
                return fmt.Errorf("task phase: %w", err)
6✔
235
        }
6✔
236

237
        // phase 2: first review pass - address ALL findings
238
        r.log.SetPhase(status.PhaseReview)
6✔
239
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
6✔
240

6✔
241
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
6✔
242
                return fmt.Errorf("first review: %w", err)
×
243
        }
×
244

245
        // phase 2.1: claude review loop (critical/major) before codex
246
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
247
                return fmt.Errorf("pre-codex review loop: %w", err)
×
248
        }
×
249

250
        // phase 2.5+3: codex → post-codex review → finalize
251
        if err := r.runCodexAndPostReview(ctx); err != nil {
6✔
252
                return err
×
253
        }
×
254

255
        r.log.Print("all phases completed successfully")
6✔
256
        return nil
6✔
257
}
258

259
// runReviewOnly executes only the review pipeline: review → codex → review.
260
func (r *Runner) runReviewOnly(ctx context.Context) error {
11✔
261
        // phase 1: first review
11✔
262
        r.log.SetPhase(status.PhaseReview)
11✔
263
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
11✔
264

11✔
265
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
12✔
266
                return fmt.Errorf("first review: %w", err)
1✔
267
        }
1✔
268

269
        // phase 1.1: claude review loop (critical/major) before codex
270
        if err := r.runClaudeReviewLoop(ctx); err != nil {
11✔
271
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
272
        }
1✔
273

274
        // phase 2+3: codex → post-codex review → finalize
275
        if err := r.runCodexAndPostReview(ctx); err != nil {
12✔
276
                return err
3✔
277
        }
3✔
278

279
        r.log.Print("review phases completed successfully")
6✔
280
        return nil
6✔
281
}
282

283
// runCodexOnly executes only the codex pipeline: codex → review → finalize.
284
func (r *Runner) runCodexOnly(ctx context.Context) error {
10✔
285
        if err := r.runCodexAndPostReview(ctx); err != nil {
11✔
286
                return err
1✔
287
        }
1✔
288

289
        r.log.Print("codex phases completed successfully")
9✔
290
        return nil
9✔
291
}
292

293
// runCodexAndPostReview runs the shared codex → post-codex claude review → finalize pipeline.
294
// used by runFull, runReviewOnly, and runCodexOnly to avoid duplicating this sequence.
295
func (r *Runner) runCodexAndPostReview(ctx context.Context) error {
25✔
296
        // codex external review loop
25✔
297
        r.log.SetPhase(status.PhaseCodex)
25✔
298
        r.log.PrintSection(status.NewGenericSection("codex external review"))
25✔
299

25✔
300
        if err := r.runCodexLoop(ctx); err != nil {
28✔
301
                return fmt.Errorf("codex loop: %w", err)
3✔
302
        }
3✔
303

304
        // claude review loop (critical/major) after codex
305
        r.log.SetPhase(status.PhaseReview)
22✔
306

22✔
307
        if err := r.runClaudeReviewLoop(ctx); err != nil {
22✔
308
                return fmt.Errorf("post-codex review loop: %w", err)
×
309
        }
×
310

311
        // optional finalize step (best-effort, but propagates context cancellation)
312
        return r.runFinalize(ctx)
22✔
313
}
314

315
// runTasksOnly executes only task phase, skipping all reviews.
316
func (r *Runner) runTasksOnly(ctx context.Context) error {
4✔
317
        if r.cfg.PlanFile == "" {
5✔
318
                return errors.New("plan file required for tasks-only mode")
1✔
319
        }
1✔
320

321
        r.log.SetPhase(status.PhaseTask)
3✔
322
        r.log.PrintRaw("starting task execution phase\n")
3✔
323

3✔
324
        if err := r.runTaskPhase(ctx); err != nil {
4✔
325
                return fmt.Errorf("task phase: %w", err)
1✔
326
        }
1✔
327

328
        r.log.Print("task execution completed successfully")
2✔
329
        return nil
2✔
330
}
331

332
// runTaskPhase executes tasks until completion or max iterations.
333
// executes ONE Task section per iteration.
334
func (r *Runner) runTaskPhase(ctx context.Context) error {
15✔
335
        prompt := r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt)
15✔
336
        retryCount := 0
15✔
337

15✔
338
        for i := 1; i <= r.cfg.MaxIterations; i++ {
34✔
339
                select {
19✔
340
                case <-ctx.Done():
1✔
341
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
342
                default:
18✔
343
                }
344

345
                r.log.PrintSection(status.NewTaskIterationSection(i))
18✔
346

18✔
347
                result := r.claude.Run(ctx, prompt)
18✔
348
                if result.Error != nil {
20✔
349
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
350
                                return err
1✔
351
                        }
1✔
352
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
353
                }
354

355
                if result.Signal == SignalCompleted {
24✔
356
                        // verify plan actually has no uncompleted checkboxes
8✔
357
                        if r.hasUncompletedTasks() {
8✔
358
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
359
                                continue
×
360
                        }
361
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
8✔
362
                        return nil
8✔
363
                }
364

365
                if result.Signal == SignalFailed {
13✔
366
                        if retryCount < r.taskRetryCount {
7✔
367
                                r.log.Print("task failed, retrying...")
2✔
368
                                retryCount++
2✔
369
                                time.Sleep(r.iterationDelay)
2✔
370
                                continue
2✔
371
                        }
372
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
373
                }
374

375
                retryCount = 0
3✔
376
                // continue with same prompt - it reads from plan file each time
3✔
377
                time.Sleep(r.iterationDelay)
3✔
378
        }
379

380
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
381
}
382

383
// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
384
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
17✔
385
        result := r.claude.Run(ctx, prompt)
17✔
386
        if result.Error != nil {
17✔
387
                if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
×
388
                        return err
×
389
                }
×
390
                return fmt.Errorf("claude execution: %w", result.Error)
×
391
        }
392

393
        if result.Signal == SignalFailed {
18✔
394
                return errors.New("review failed (FAILED signal received)")
1✔
395
        }
1✔
396

397
        if !IsReviewDone(result.Signal) {
16✔
398
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
×
399
        }
×
400

401
        return nil
16✔
402
}
403

404
// runClaudeReviewLoop runs claude review iterations using second review prompt.
405
func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {
38✔
406
        // review iterations = 10% of max_iterations
38✔
407
        maxReviewIterations := max(minReviewIterations, r.cfg.MaxIterations/reviewIterationDivisor)
38✔
408

38✔
409
        for i := 1; i <= maxReviewIterations; i++ {
79✔
410
                select {
41✔
411
                case <-ctx.Done():
×
412
                        return fmt.Errorf("review: %w", ctx.Err())
×
413
                default:
41✔
414
                }
415

416
                r.log.PrintSection(status.NewClaudeReviewSection(i, ": critical/major"))
41✔
417

41✔
418
                // capture HEAD hash before running claude for no-commit detection
41✔
419
                headBefore := r.headHash()
41✔
420

41✔
421
                result := r.claude.Run(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt))
41✔
422
                if result.Error != nil {
42✔
423
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
2✔
424
                                return err
1✔
425
                        }
1✔
426
                        return fmt.Errorf("claude execution: %w", result.Error)
×
427
                }
428

429
                if result.Signal == SignalFailed {
40✔
430
                        return errors.New("review failed (FAILED signal received)")
×
431
                }
×
432

433
                if IsReviewDone(result.Signal) {
75✔
434
                        r.log.Print("claude review complete - no more findings")
35✔
435
                        return nil
35✔
436
                }
35✔
437

438
                // fallback: if HEAD hash hasn't changed, claude found nothing to fix
439
                if headBefore != "" {
7✔
440
                        if headAfter := r.headHash(); headAfter == headBefore {
3✔
441
                                r.log.Print("claude review complete - no changes detected")
1✔
442
                                return nil
1✔
443
                        }
1✔
444
                }
445

446
                r.log.Print("issues fixed, running another review iteration...")
4✔
447
                time.Sleep(r.iterationDelay)
4✔
448
        }
449

450
        r.log.Print("max claude review iterations reached, continuing...")
1✔
451
        return nil
1✔
452
}
453

454
// headHash returns the current HEAD commit hash, or empty string if unavailable.
455
func (r *Runner) headHash() string {
43✔
456
        if r.git == nil {
79✔
457
                return ""
36✔
458
        }
36✔
459
        hash, err := r.git.HeadHash()
7✔
460
        if err != nil {
7✔
NEW
461
                return ""
×
NEW
462
        }
×
463
        return hash
7✔
464
}
465

466
// externalReviewTool returns the effective external review tool to use.
467
// handles backward compatibility: codex_enabled = false → "none"
468
// the CodexEnabled flag takes precedence for backward compatibility.
469
func (r *Runner) externalReviewTool() string {
25✔
470
        // backward compatibility: codex_enabled = false means no external review
25✔
471
        // this takes precedence over external_review_tool setting
25✔
472
        if !r.cfg.CodexEnabled {
37✔
473
                return "none"
12✔
474
        }
12✔
475

476
        // check explicit external_review_tool setting
477
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
26✔
478
                return r.cfg.AppConfig.ExternalReviewTool
13✔
479
        }
13✔
480

481
        // default to codex
482
        return "codex"
×
483
}
484

485
// runCodexLoop runs the external review loop (codex or custom) until no findings.
486
func (r *Runner) runCodexLoop(ctx context.Context) error {
25✔
487
        tool := r.externalReviewTool()
25✔
488

25✔
489
        // skip external review phase if disabled
25✔
490
        if tool == "none" {
38✔
491
                r.log.Print("external review disabled, skipping...")
13✔
492
                return nil
13✔
493
        }
13✔
494

495
        // custom review tool
496
        if tool == "custom" {
14✔
497
                if r.custom == nil {
3✔
498
                        return errors.New("custom review script not configured")
1✔
499
                }
1✔
500
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
1✔
501
                        name:            "custom",
1✔
502
                        runReview:       func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
2✔
503
                        buildPrompt:     r.buildCustomReviewPrompt,
504
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
505
                        showSummary:     r.showCustomSummary,
506
                        makeSection:     status.NewCustomIterationSection,
507
                })
508
        }
509

510
        // default: codex review
511
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
10✔
512
                name:            "codex",
10✔
513
                runReview:       r.codex.Run,
10✔
514
                buildPrompt:     r.buildCodexPrompt,
10✔
515
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
10✔
516
                showSummary:     r.showCodexSummary,
10✔
517
                makeSection:     status.NewCodexIterationSection,
10✔
518
        })
10✔
519
}
520

521
// externalReviewConfig holds callbacks for running an external review tool.
522
type externalReviewConfig struct {
523
        name            string                                                   // tool name for error messages
524
        runReview       func(ctx context.Context, prompt string) executor.Result // run the external review tool
525
        buildPrompt     func(isFirst bool, claudeResponse string) string         // build prompt for review tool
526
        buildEvalPrompt func(output string) string                               // build evaluation prompt for claude
527
        showSummary     func(output string)                                      // display review findings summary
528
        makeSection     func(iteration int) status.Section                       // create section header
529
}
530

531
// runExternalReviewLoop runs a generic external review tool-claude loop until no findings.
532
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) error {
11✔
533
        // iterations = 20% of max_iterations (min 3)
11✔
534
        maxIterations := max(3, r.cfg.MaxIterations/5)
11✔
535

11✔
536
        var claudeResponse string // first iteration has no prior response
11✔
537

11✔
538
        for i := 1; i <= maxIterations; i++ {
22✔
539
                select {
11✔
540
                case <-ctx.Done():
×
541
                        return fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
542
                default:
11✔
543
                }
544

545
                r.log.PrintSection(cfg.makeSection(i))
11✔
546

11✔
547
                // run external review tool
11✔
548
                reviewResult := cfg.runReview(ctx, cfg.buildPrompt(i == 1, claudeResponse))
11✔
549
                if reviewResult.Error != nil {
13✔
550
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
551
                                return err
1✔
552
                        }
1✔
553
                        return fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
554
                }
555

556
                if reviewResult.Output == "" {
11✔
557
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
2✔
558
                        break
2✔
559
                }
560

561
                // show findings summary before Claude evaluation
562
                cfg.showSummary(reviewResult.Output)
7✔
563

7✔
564
                // pass output to claude for evaluation and fixing
7✔
565
                r.log.SetPhase(status.PhaseClaudeEval)
7✔
566
                r.log.PrintSection(status.NewClaudeEvalSection())
7✔
567
                claudeResult := r.claude.Run(ctx, cfg.buildEvalPrompt(reviewResult.Output))
7✔
568

7✔
569
                // restore codex phase for next iteration
7✔
570
                r.log.SetPhase(status.PhaseCodex)
7✔
571
                if claudeResult.Error != nil {
7✔
572
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
573
                                return err
×
574
                        }
×
575
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
×
576
                }
577

578
                claudeResponse = claudeResult.Output
7✔
579

7✔
580
                // exit only when claude sees "no findings"
7✔
581
                if IsCodexDone(claudeResult.Signal) {
14✔
582
                        r.log.Print("%s review complete - no more findings", cfg.name)
7✔
583
                        return nil
7✔
584
                }
7✔
585

586
                time.Sleep(r.iterationDelay)
×
587
        }
588

589
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
2✔
590
        return nil
2✔
591
}
592

593
// buildCodexPrompt creates the prompt for codex review.
594
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
11✔
595
        // build plan context if available
11✔
596
        planContext := ""
11✔
597
        if r.cfg.PlanFile != "" {
14✔
598
                planContext = fmt.Sprintf(`
3✔
599
## Plan Context
3✔
600
The code implements the plan at: %s
3✔
601

3✔
602
---
3✔
603
`, r.resolvePlanFilePath())
3✔
604
        }
3✔
605

606
        // different diff command based on iteration
607
        var diffInstruction, diffDescription string
11✔
608
        if isFirst {
22✔
609
                defaultBranch := r.getDefaultBranch()
11✔
610
                diffInstruction = fmt.Sprintf("Run: git diff %s...HEAD", defaultBranch)
11✔
611
                diffDescription = fmt.Sprintf("code changes between %s and HEAD branch", defaultBranch)
11✔
612
        } else {
11✔
613
                diffInstruction = "Run: git diff"
×
614
                diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"
×
615
        }
×
616

617
        basePrompt := fmt.Sprintf(`%sReview the %s.
11✔
618

11✔
619
%s
11✔
620

11✔
621
Analyze for:
11✔
622
- Bugs and logic errors
11✔
623
- Security vulnerabilities
11✔
624
- Race conditions
11✔
625
- Error handling gaps
11✔
626
- Code quality issues
11✔
627

11✔
628
Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)
11✔
629

11✔
630
        if claudeResponse != "" {
11✔
631
                return fmt.Sprintf(`%s
×
632

×
633
---
×
634
PREVIOUS REVIEW CONTEXT:
×
635
Claude (previous reviewer) responded to your findings:
×
636

×
637
%s
×
638

×
639
Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.
×
640
If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)
×
641
        }
×
642

643
        return basePrompt
11✔
644
}
645

646
// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
647
func (r *Runner) hasUncompletedTasks() bool {
13✔
648
        content, err := os.ReadFile(r.resolvePlanFilePath())
13✔
649
        if err != nil {
13✔
650
                return true // assume incomplete if can't read
×
651
        }
×
652

653
        // look for uncompleted checkbox pattern: [ ] (not [x])
654
        for line := range strings.SplitSeq(string(content), "\n") {
43✔
655
                trimmed := strings.TrimSpace(line)
30✔
656
                if strings.HasPrefix(trimmed, "- [ ]") {
33✔
657
                        return true
3✔
658
                }
3✔
659
        }
660
        return false
10✔
661
}
662

663
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
664
// extracts text until first code block or maxCodexSummaryLen chars, whichever is shorter.
665
func (r *Runner) showCodexSummary(output string) {
6✔
666
        r.showExternalReviewSummary("codex", output)
6✔
667
}
6✔
668

669
// showCustomSummary displays a condensed summary of custom review output before Claude evaluation.
670
func (r *Runner) showCustomSummary(output string) {
1✔
671
        r.showExternalReviewSummary("custom", output)
1✔
672
}
1✔
673

674
// showExternalReviewSummary displays a condensed summary of external review output.
675
// extracts text until first code block or 5000 chars, whichever is shorter.
676
func (r *Runner) showExternalReviewSummary(toolName, output string) {
7✔
677
        summary := output
7✔
678

7✔
679
        // trim to first code block if present
7✔
680
        if idx := strings.Index(summary, "```"); idx > 0 {
7✔
681
                summary = summary[:idx]
×
682
        }
×
683

684
        // limit to maxCodexSummaryLen chars
685
        if len(summary) > maxCodexSummaryLen {
7✔
686
                summary = summary[:maxCodexSummaryLen] + "..."
×
687
        }
×
688

689
        summary = strings.TrimSpace(summary)
7✔
690
        if summary == "" {
7✔
691
                return
×
692
        }
×
693

694
        r.log.Print("%s findings:", toolName)
7✔
695
        for line := range strings.SplitSeq(summary, "\n") {
14✔
696
                if strings.TrimSpace(line) == "" {
7✔
697
                        continue
×
698
                }
699
                r.log.PrintAligned("  " + line)
7✔
700
        }
701
}
702

703
// ErrUserRejectedPlan is returned when user rejects the plan draft.
704
var ErrUserRejectedPlan = errors.New("user rejected plan")
705

706
// draftReviewResult holds the result of draft review handling.
707
type draftReviewResult struct {
708
        handled  bool   // true if draft was found and handled
709
        feedback string // revision feedback (non-empty only for "revise" action)
710
        err      error  // error if review failed or user rejected
711
}
712

713
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
714
// returns result indicating whether draft was handled and any feedback/errors.
715
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
15✔
716
        planContent, draftErr := ParsePlanDraftPayload(output)
15✔
717
        if draftErr != nil {
24✔
718
                // log malformed signals (but not "no signal" which is expected)
9✔
719
                if !errors.Is(draftErr, ErrNoPlanDraftSignal) {
10✔
720
                        r.log.Print("warning: %v", draftErr)
1✔
721
                }
1✔
722
                return draftReviewResult{handled: false}
9✔
723
        }
724

725
        r.log.Print("plan draft ready for review")
6✔
726

6✔
727
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
6✔
728
        if askErr != nil {
7✔
729
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
730
        }
1✔
731

732
        // log the draft review action and feedback to progress file
733
        r.log.LogDraftReview(action, feedback)
5✔
734

5✔
735
        switch action {
5✔
736
        case "accept":
3✔
737
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
738
                return draftReviewResult{handled: true}
3✔
739
        case "revise":
1✔
740
                r.log.Print("revision requested, re-running with feedback...")
1✔
741
                return draftReviewResult{handled: true, feedback: feedback}
1✔
742
        case "reject":
1✔
743
                r.log.Print("plan rejected by user")
1✔
744
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
745
        }
746

747
        return draftReviewResult{handled: true}
×
748
}
749

750
// handlePlanQuestion processes QUESTION signal if present in output.
751
// returns true if question was found and handled, false otherwise.
752
// returns error if question handling failed.
753
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
754
        question, err := ParseQuestionPayload(output)
9✔
755
        if err != nil {
15✔
756
                // log malformed signals (but not "no signal" which is expected)
6✔
757
                if !errors.Is(err, ErrNoQuestionSignal) {
6✔
758
                        r.log.Print("warning: %v", err)
×
759
                }
×
760
                return false, nil
6✔
761
        }
762

763
        r.log.LogQuestion(question.Question, question.Options)
3✔
764

3✔
765
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
766
        if askErr != nil {
4✔
767
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
768
        }
1✔
769

770
        r.log.LogAnswer(answer)
2✔
771
        return true, nil
2✔
772
}
773

774
// runPlanCreation executes the interactive plan creation loop.
775
// the loop continues until PLAN_READY signal or max iterations reached.
776
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
777
func (r *Runner) runPlanCreation(ctx context.Context) error {
16✔
778
        if r.cfg.PlanDescription == "" {
17✔
779
                return errors.New("plan description required for plan mode")
1✔
780
        }
1✔
781
        if r.inputCollector == nil {
16✔
782
                return errors.New("input collector required for plan mode")
1✔
783
        }
1✔
784

785
        r.log.SetPhase(status.PhasePlan)
14✔
786
        r.log.PrintRaw("starting interactive plan creation\n")
14✔
787
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
14✔
788

14✔
789
        // plan iterations use 20% of max_iterations
14✔
790
        maxPlanIterations := max(minPlanIterations, r.cfg.MaxIterations/planIterationDivisor)
14✔
791

14✔
792
        // track revision feedback for context in next iteration
14✔
793
        var lastRevisionFeedback string
14✔
794

14✔
795
        for i := 1; i <= maxPlanIterations; i++ {
39✔
796
                select {
25✔
797
                case <-ctx.Done():
1✔
798
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
799
                default:
24✔
800
                }
801

802
                r.log.PrintSection(status.NewPlanIterationSection(i))
24✔
803

24✔
804
                prompt := r.buildPlanPrompt()
24✔
805
                // append revision feedback context if present
24✔
806
                if lastRevisionFeedback != "" {
25✔
807
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
1✔
808
                        lastRevisionFeedback = "" // clear after use
1✔
809
                }
1✔
810

811
                result := r.claude.Run(ctx, prompt)
24✔
812
                if result.Error != nil {
26✔
813
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
814
                                return err
1✔
815
                        }
1✔
816
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
817
                }
818

819
                if result.Signal == SignalFailed {
23✔
820
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
821
                }
1✔
822

823
                // check for PLAN_READY signal
824
                if IsPlanReady(result.Signal) {
27✔
825
                        r.log.Print("plan creation completed")
6✔
826
                        return nil
6✔
827
                }
6✔
828

829
                // check for PLAN_DRAFT signal - present draft for user review
830
                draftResult := r.handlePlanDraft(ctx, result.Output)
15✔
831
                if draftResult.err != nil {
17✔
832
                        return draftResult.err
2✔
833
                }
2✔
834
                if draftResult.handled {
17✔
835
                        lastRevisionFeedback = draftResult.feedback
4✔
836
                        time.Sleep(r.iterationDelay)
4✔
837
                        continue
4✔
838
                }
839

840
                // check for QUESTION signal
841
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
842
                if err != nil {
10✔
843
                        return err
1✔
844
                }
1✔
845
                if handled {
10✔
846
                        time.Sleep(r.iterationDelay)
2✔
847
                        continue
2✔
848
                }
849

850
                // no question, no draft, and no completion - continue
851
                time.Sleep(r.iterationDelay)
6✔
852
        }
853

854
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
855
}
856

857
// handlePatternMatchError checks if err is a PatternMatchError and logs appropriate messages.
858
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
859
func (r *Runner) handlePatternMatchError(err error, tool string) error {
8✔
860
        var patternErr *executor.PatternMatchError
8✔
861
        if errors.As(err, &patternErr) {
12✔
862
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
863
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
864
                return err
4✔
865
        }
4✔
866
        return nil
4✔
867
}
868

869
// runFinalize executes the optional finalize step after successful reviews.
870
// runs once, best-effort: failures are logged but don't block success.
871
// exception: context cancellation is propagated (user wants to abort).
872
func (r *Runner) runFinalize(ctx context.Context) error {
22✔
873
        if !r.cfg.FinalizeEnabled {
36✔
874
                return nil
14✔
875
        }
14✔
876

877
        r.log.SetPhase(status.PhaseFinalize)
8✔
878
        r.log.PrintSection(status.NewGenericSection("finalize step"))
8✔
879

8✔
880
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
8✔
881
        result := r.claude.Run(ctx, prompt)
8✔
882

8✔
883
        if result.Error != nil {
10✔
884
                // propagate context cancellation - user wants to abort
2✔
885
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
3✔
886
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
887
                }
1✔
888
                // pattern match (rate limit) - log via shared helper, but don't fail (best-effort)
889
                if r.handlePatternMatchError(result.Error, "claude") != nil {
1✔
890
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
×
891
                }
×
892
                // best-effort: log error but don't fail
893
                r.log.Print("finalize step failed: %v", result.Error)
1✔
894
                return nil
1✔
895
        }
896

897
        if result.Signal == SignalFailed {
7✔
898
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
899
                return nil
1✔
900
        }
1✔
901

902
        r.log.Print("finalize step completed")
5✔
903
        return nil
5✔
904
}
905

906
// needsCodexBinary returns true if the current configuration requires the codex binary.
907
// returns false when external_review_tool is "custom" or "none", since codex isn't used.
908
func needsCodexBinary(appConfig *config.Config) bool {
3✔
909
        if appConfig == nil {
3✔
910
                return true // default behavior assumes codex
×
911
        }
×
912
        switch appConfig.ExternalReviewTool {
3✔
913
        case "custom", "none":
2✔
914
                return false
2✔
915
        default:
1✔
916
                return true // "codex" or empty (default) requires codex binary
1✔
917
        }
918
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc