• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 21725698798

05 Feb 2026 07:35PM UTC coverage: 80.789% (+0.4%) from 80.435%
21725698798

Pull #67

github

umputun
fix: strengthen codex eval prompt to prevent premature signal

claude was emitting CODEX_REVIEW_DONE after fixing issues instead of
stopping to let codex verify fixes. add explicit instructions that
the signal must only be emitted when codex reports no findings.
Pull Request #67: feat: custom external review support

222 of 252 new or added lines in 9 files covered. (88.1%)

2 existing lines in 1 file now uncovered.

4525 of 5601 relevant lines covered (80.79%)

152.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.77
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "os"
9
        "os/exec"
10
        "strings"
11
        "time"
12

13
        "github.com/umputun/ralphex/pkg/config"
14
        "github.com/umputun/ralphex/pkg/executor"
15
)
16

17
// DefaultIterationDelay is the pause between iterations to allow system to settle.
18
const DefaultIterationDelay = 2 * time.Second
19

20
// Mode represents the execution mode.
21
type Mode string
22

23
const (
24
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
25
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
26
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
27
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
28
        ModePlan      Mode = "plan"       // interactive plan creation mode
29
)
30

31
// Config holds runner configuration.
32
type Config struct {
33
        PlanFile         string         // path to plan file (required for full mode)
34
        PlanDescription  string         // plan description for interactive plan creation mode
35
        ProgressPath     string         // path to progress file
36
        Mode             Mode           // execution mode
37
        MaxIterations    int            // maximum iterations for task phase
38
        Debug            bool           // enable debug output
39
        NoColor          bool           // disable color output
40
        IterationDelayMs int            // delay between iterations in milliseconds
41
        TaskRetryCount   int            // number of times to retry failed tasks
42
        CodexEnabled     bool           // whether codex review is enabled
43
        FinalizeEnabled  bool           // whether finalize step is enabled
44
        DefaultBranch    string         // default branch name (detected from repo)
45
        AppConfig        *config.Config // full application config (for executors and prompts)
46
}
47

48
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
49
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
50
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
51

52
// Executor runs CLI commands and returns results.
53
type Executor interface {
54
        Run(ctx context.Context, prompt string) executor.Result
55
}
56

57
// Logger provides logging functionality.
58
type Logger interface {
59
        SetPhase(phase Phase)
60
        Print(format string, args ...any)
61
        PrintRaw(format string, args ...any)
62
        PrintSection(section Section)
63
        PrintAligned(text string)
64
        LogQuestion(question string, options []string)
65
        LogAnswer(answer string)
66
        LogDraftReview(action string, feedback string)
67
        Path() string
68
}
69

70
// InputCollector provides interactive input collection for plan creation.
71
type InputCollector interface {
72
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
73
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
74
}
75

76
// Runner orchestrates the execution loop.
77
type Runner struct {
78
        cfg            Config
79
        log            Logger
80
        claude         Executor
81
        codex          Executor
82
        custom         *executor.CustomExecutor
83
        inputCollector InputCollector
84
        iterationDelay time.Duration
85
        taskRetryCount int
86
}
87

88
// New creates a new Runner with the given configuration.
89
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
90
func New(cfg Config, log Logger) *Runner {
3✔
91
        // build claude executor with config values
3✔
92
        claudeExec := &executor.ClaudeExecutor{
3✔
93
                OutputHandler: func(text string) {
3✔
94
                        log.PrintAligned(text)
×
95
                },
×
96
                Debug: cfg.Debug,
97
        }
98
        if cfg.AppConfig != nil {
6✔
99
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
3✔
100
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
3✔
101
                claudeExec.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
3✔
102
        }
3✔
103

104
        // build codex executor with config values
105
        codexExec := &executor.CodexExecutor{
3✔
106
                OutputHandler: func(text string) {
3✔
107
                        log.PrintAligned(text)
×
108
                },
×
109
                Debug: cfg.Debug,
110
        }
111
        if cfg.AppConfig != nil {
6✔
112
                codexExec.Command = cfg.AppConfig.CodexCommand
3✔
113
                codexExec.Model = cfg.AppConfig.CodexModel
3✔
114
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
3✔
115
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
3✔
116
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
3✔
117
                codexExec.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
3✔
118
        }
3✔
119

120
        // build custom executor if custom review script is configured
121
        var customExec *executor.CustomExecutor
3✔
122
        if cfg.AppConfig != nil && cfg.AppConfig.CustomReviewScript != "" {
4✔
123
                customExec = &executor.CustomExecutor{
1✔
124
                        Script: cfg.AppConfig.CustomReviewScript,
1✔
125
                        OutputHandler: func(text string) {
1✔
NEW
126
                                log.PrintAligned(text)
×
NEW
127
                        },
×
128
                        ErrorPatterns: cfg.AppConfig.CodexErrorPatterns, // reuse codex error patterns
129
                }
130
        }
131

132
        // auto-disable codex if the binary is not installed AND we need codex
133
        // (skip this check if using custom external review tool or external review is disabled)
134
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
4✔
135
                codexCmd := codexExec.Command
1✔
136
                if codexCmd == "" {
1✔
137
                        codexCmd = "codex"
×
138
                }
×
139
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
140
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
141
                        cfg.CodexEnabled = false
1✔
142
                }
1✔
143
        }
144

145
        return NewWithExecutors(cfg, log, claudeExec, codexExec, customExec)
3✔
146
}
147

148
// NewWithExecutors creates a new Runner with custom executors (for testing).
149
func NewWithExecutors(cfg Config, log Logger, claude, codex Executor, custom *executor.CustomExecutor) *Runner {
62✔
150
        // determine iteration delay from config or default
62✔
151
        iterDelay := DefaultIterationDelay
62✔
152
        if cfg.IterationDelayMs > 0 {
78✔
153
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
16✔
154
        }
16✔
155

156
        // determine task retry count from config
157
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
158
        retryCount := 1
62✔
159
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
112✔
160
                retryCount = cfg.TaskRetryCount
50✔
161
        } else if cfg.TaskRetryCount > 0 {
63✔
162
                retryCount = cfg.TaskRetryCount
1✔
163
        }
1✔
164

165
        return &Runner{
62✔
166
                cfg:            cfg,
62✔
167
                log:            log,
62✔
168
                claude:         claude,
62✔
169
                codex:          codex,
62✔
170
                custom:         custom,
62✔
171
                iterationDelay: iterDelay,
62✔
172
                taskRetryCount: retryCount,
62✔
173
        }
62✔
174
}
175

176
// SetInputCollector sets the input collector for plan creation mode.
177
func (r *Runner) SetInputCollector(c InputCollector) {
15✔
178
        r.inputCollector = c
15✔
179
}
15✔
180

181
// Run executes the main loop based on configured mode.
182
func (r *Runner) Run(ctx context.Context) error {
50✔
183
        switch r.cfg.Mode {
50✔
184
        case ModeFull:
13✔
185
                return r.runFull(ctx)
13✔
186
        case ModeReview:
7✔
187
                return r.runReviewOnly(ctx)
7✔
188
        case ModeCodexOnly:
9✔
189
                return r.runCodexOnly(ctx)
9✔
190
        case ModeTasksOnly:
4✔
191
                return r.runTasksOnly(ctx)
4✔
192
        case ModePlan:
16✔
193
                return r.runPlanCreation(ctx)
16✔
194
        default:
1✔
195
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
196
        }
197
}
198

199
// runFull executes the complete pipeline: tasks → review → codex → review.
200
func (r *Runner) runFull(ctx context.Context) error {
13✔
201
        if r.cfg.PlanFile == "" {
14✔
202
                return errors.New("plan file required for full mode")
1✔
203
        }
1✔
204

205
        // phase 1: task execution
206
        r.log.SetPhase(PhaseTask)
12✔
207
        r.log.PrintRaw("starting task execution phase\n")
12✔
208

12✔
209
        if err := r.runTaskPhase(ctx); err != nil {
18✔
210
                return fmt.Errorf("task phase: %w", err)
6✔
211
        }
6✔
212

213
        // phase 2: first review pass - address ALL findings
214
        r.log.SetPhase(PhaseReview)
6✔
215
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))
6✔
216

6✔
217
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
6✔
218
                return fmt.Errorf("first review: %w", err)
×
219
        }
×
220

221
        // phase 2.1: claude review loop (critical/major) before codex
222
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
223
                return fmt.Errorf("pre-codex review loop: %w", err)
×
224
        }
×
225

226
        // phase 2.5: codex external review loop
227
        r.log.SetPhase(PhaseCodex)
6✔
228
        r.log.PrintSection(NewGenericSection("codex external review"))
6✔
229

6✔
230
        if err := r.runCodexLoop(ctx); err != nil {
6✔
231
                return fmt.Errorf("codex loop: %w", err)
×
232
        }
×
233

234
        // phase 3: claude review loop (critical/major) after codex
235
        r.log.SetPhase(PhaseReview)
6✔
236

6✔
237
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
238
                return fmt.Errorf("post-codex review loop: %w", err)
×
239
        }
×
240

241
        // optional finalize step (best-effort, but propagates context cancellation)
242
        if err := r.runFinalize(ctx); err != nil {
6✔
243
                return err
×
244
        }
×
245

246
        r.log.Print("all phases completed successfully")
6✔
247
        return nil
6✔
248
}
249

250
// runReviewOnly executes only the review pipeline: review → codex → review.
251
func (r *Runner) runReviewOnly(ctx context.Context) error {
7✔
252
        // phase 1: first review
7✔
253
        r.log.SetPhase(PhaseReview)
7✔
254
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))
7✔
255

7✔
256
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
8✔
257
                return fmt.Errorf("first review: %w", err)
1✔
258
        }
1✔
259

260
        // phase 1.1: claude review loop (critical/major) before codex
261
        if err := r.runClaudeReviewLoop(ctx); err != nil {
7✔
262
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
263
        }
1✔
264

265
        // phase 2: codex external review loop
266
        r.log.SetPhase(PhaseCodex)
5✔
267
        r.log.PrintSection(NewGenericSection("codex external review"))
5✔
268

5✔
269
        if err := r.runCodexLoop(ctx); err != nil {
7✔
270
                return fmt.Errorf("codex loop: %w", err)
2✔
271
        }
2✔
272

273
        // phase 3: claude review loop (critical/major) after codex
274
        r.log.SetPhase(PhaseReview)
3✔
275

3✔
276
        if err := r.runClaudeReviewLoop(ctx); err != nil {
3✔
277
                return fmt.Errorf("post-codex review loop: %w", err)
×
278
        }
×
279

280
        // optional finalize step (best-effort, but propagates context cancellation)
281
        if err := r.runFinalize(ctx); err != nil {
4✔
282
                return err
1✔
283
        }
1✔
284

285
        r.log.Print("review phases completed successfully")
2✔
286
        return nil
2✔
287
}
288

289
// runCodexOnly executes only the codex pipeline: codex → review.
290
func (r *Runner) runCodexOnly(ctx context.Context) error {
9✔
291
        // phase 1: codex external review loop
9✔
292
        r.log.SetPhase(PhaseCodex)
9✔
293
        r.log.PrintSection(NewGenericSection("codex external review"))
9✔
294

9✔
295
        if err := r.runCodexLoop(ctx); err != nil {
10✔
296
                return fmt.Errorf("codex loop: %w", err)
1✔
297
        }
1✔
298

299
        // phase 2: claude review loop (critical/major) after codex
300
        r.log.SetPhase(PhaseReview)
8✔
301

8✔
302
        if err := r.runClaudeReviewLoop(ctx); err != nil {
8✔
303
                return fmt.Errorf("post-codex review loop: %w", err)
×
304
        }
×
305

306
        // optional finalize step (best-effort, but propagates context cancellation)
307
        if err := r.runFinalize(ctx); err != nil {
8✔
308
                return err
×
309
        }
×
310

311
        r.log.Print("codex phases completed successfully")
8✔
312
        return nil
8✔
313
}
314

315
// runTasksOnly executes only task phase, skipping all reviews.
316
func (r *Runner) runTasksOnly(ctx context.Context) error {
4✔
317
        if r.cfg.PlanFile == "" {
5✔
318
                return errors.New("plan file required for tasks-only mode")
1✔
319
        }
1✔
320

321
        r.log.SetPhase(PhaseTask)
3✔
322
        r.log.PrintRaw("starting task execution phase\n")
3✔
323

3✔
324
        if err := r.runTaskPhase(ctx); err != nil {
4✔
325
                return fmt.Errorf("task phase: %w", err)
1✔
326
        }
1✔
327

328
        r.log.Print("task execution completed successfully")
2✔
329
        return nil
2✔
330
}
331

332
// runTaskPhase executes tasks until completion or max iterations.
333
// executes ONE Task section per iteration.
334
func (r *Runner) runTaskPhase(ctx context.Context) error {
15✔
335
        prompt := r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt)
15✔
336
        retryCount := 0
15✔
337

15✔
338
        for i := 1; i <= r.cfg.MaxIterations; i++ {
34✔
339
                select {
19✔
340
                case <-ctx.Done():
1✔
341
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
342
                default:
18✔
343
                }
344

345
                r.log.PrintSection(NewTaskIterationSection(i))
18✔
346

18✔
347
                result := r.claude.Run(ctx, prompt)
18✔
348
                if result.Error != nil {
20✔
349
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
350
                                return err
1✔
351
                        }
1✔
352
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
353
                }
354

355
                if result.Signal == SignalCompleted {
24✔
356
                        // verify plan actually has no uncompleted checkboxes
8✔
357
                        if r.hasUncompletedTasks() {
8✔
358
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
359
                                continue
×
360
                        }
361
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
8✔
362
                        return nil
8✔
363
                }
364

365
                if result.Signal == SignalFailed {
13✔
366
                        if retryCount < r.taskRetryCount {
7✔
367
                                r.log.Print("task failed, retrying...")
2✔
368
                                retryCount++
2✔
369
                                time.Sleep(r.iterationDelay)
2✔
370
                                continue
2✔
371
                        }
372
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
373
                }
374

375
                retryCount = 0
3✔
376
                // continue with same prompt - it reads from plan file each time
3✔
377
                time.Sleep(r.iterationDelay)
3✔
378
        }
379

380
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
381
}
382

383
// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
384
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
13✔
385
        result := r.claude.Run(ctx, prompt)
13✔
386
        if result.Error != nil {
13✔
387
                if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
×
388
                        return err
×
389
                }
×
390
                return fmt.Errorf("claude execution: %w", result.Error)
×
391
        }
392

393
        if result.Signal == SignalFailed {
14✔
394
                return errors.New("review failed (FAILED signal received)")
1✔
395
        }
1✔
396

397
        if !IsReviewDone(result.Signal) {
12✔
398
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
×
399
        }
×
400

401
        return nil
12✔
402
}
403

404
// runClaudeReviewLoop runs claude review iterations using second review prompt.
405
func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {
29✔
406
        // review iterations = 10% of max_iterations (min 3)
29✔
407
        maxReviewIterations := max(3, r.cfg.MaxIterations/10)
29✔
408

29✔
409
        for i := 1; i <= maxReviewIterations; i++ {
58✔
410
                select {
29✔
411
                case <-ctx.Done():
×
412
                        return fmt.Errorf("review: %w", ctx.Err())
×
413
                default:
29✔
414
                }
415

416
                r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))
29✔
417

29✔
418
                result := r.claude.Run(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt))
29✔
419
                if result.Error != nil {
30✔
420
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
2✔
421
                                return err
1✔
422
                        }
1✔
423
                        return fmt.Errorf("claude execution: %w", result.Error)
×
424
                }
425

426
                if result.Signal == SignalFailed {
28✔
427
                        return errors.New("review failed (FAILED signal received)")
×
428
                }
×
429

430
                if IsReviewDone(result.Signal) {
56✔
431
                        r.log.Print("claude review complete - no more findings")
28✔
432
                        return nil
28✔
433
                }
28✔
434

435
                r.log.Print("issues fixed, running another review iteration...")
×
436
                time.Sleep(r.iterationDelay)
×
437
        }
438

439
        r.log.Print("max claude review iterations reached, continuing...")
×
440
        return nil
×
441
}
442

443
// externalReviewTool returns the effective external review tool to use.
444
// handles backward compatibility: codex_enabled = false → "none"
445
// the CodexEnabled flag takes precedence for backward compatibility.
446
func (r *Runner) externalReviewTool() string {
20✔
447
        // backward compatibility: codex_enabled = false means no external review
20✔
448
        // this takes precedence over external_review_tool setting
20✔
449
        if !r.cfg.CodexEnabled {
29✔
450
                return "none"
9✔
451
        }
9✔
452

453
        // check explicit external_review_tool setting
454
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
22✔
455
                return r.cfg.AppConfig.ExternalReviewTool
11✔
456
        }
11✔
457

458
        // default to codex
NEW
459
        return "codex"
×
460
}
461

462
// runCodexLoop runs the external review loop (codex or custom) until no findings.
463
func (r *Runner) runCodexLoop(ctx context.Context) error {
20✔
464
        tool := r.externalReviewTool()
20✔
465

20✔
466
        // skip external review phase if disabled
20✔
467
        if tool == "none" {
30✔
468
                r.log.Print("external review disabled, skipping...")
10✔
469
                return nil
10✔
470
        }
10✔
471

472
        // custom review tool
473
        if tool == "custom" {
12✔
474
                if r.custom == nil {
3✔
475
                        return errors.New("custom review script not configured")
1✔
476
                }
1✔
477
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
1✔
478
                        name:           "custom",
1✔
479
                        runReview:      func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
2✔
480
                        buildPrompt:    r.buildCustomReviewPrompt,
481
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
482
                        showSummary:    r.showCustomSummary,
483
                        makeSection:    NewCustomIterationSection,
484
                })
485
        }
486

487
        // default: codex review
488
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
8✔
489
                name:           "codex",
8✔
490
                runReview:      r.codex.Run,
8✔
491
                buildPrompt:    r.buildCodexPrompt,
8✔
492
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
8✔
493
                showSummary:    r.showCodexSummary,
8✔
494
                makeSection:    NewCodexIterationSection,
8✔
495
        })
8✔
496
}
497

498
// externalReviewConfig holds callbacks for running an external review tool.
499
type externalReviewConfig struct {
500
        name           string                                                     // tool name for error messages
501
        runReview      func(ctx context.Context, prompt string) executor.Result   // run the external review tool
502
        buildPrompt    func(isFirst bool, claudeResponse string) string           // build prompt for review tool
503
        buildEvalPrompt func(output string) string                                // build evaluation prompt for claude
504
        showSummary    func(output string)                                        // display review findings summary
505
        makeSection    func(iteration int) Section                                // create section header
506
}
507

508
// runExternalReviewLoop runs a generic external review tool-claude loop until no findings.
509
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) error {
9✔
510
        // iterations = 20% of max_iterations (min 3)
9✔
511
        maxIterations := max(3, r.cfg.MaxIterations/5)
9✔
512

9✔
513
        var claudeResponse string // first iteration has no prior response
9✔
514

9✔
515
        for i := 1; i <= maxIterations; i++ {
18✔
516
                select {
9✔
517
                case <-ctx.Done():
×
NEW
518
                        return fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
519
                default:
9✔
520
                }
521

522
                r.log.PrintSection(cfg.makeSection(i))
9✔
523

9✔
524
                // run external review tool
9✔
525
                reviewResult := cfg.runReview(ctx, cfg.buildPrompt(i == 1, claudeResponse))
9✔
526
                if reviewResult.Error != nil {
11✔
527
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
528
                                return err
1✔
529
                        }
1✔
530
                        return fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
531
                }
532

533
                if reviewResult.Output == "" {
9✔
534
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
2✔
535
                        break
2✔
536
                }
537

538
                // show findings summary before Claude evaluation
539
                cfg.showSummary(reviewResult.Output)
5✔
540

5✔
541
                // pass output to claude for evaluation and fixing
5✔
542
                r.log.SetPhase(PhaseClaudeEval)
5✔
543
                r.log.PrintSection(NewClaudeEvalSection())
5✔
544
                claudeResult := r.claude.Run(ctx, cfg.buildEvalPrompt(reviewResult.Output))
5✔
545

5✔
546
                // restore codex phase for next iteration
5✔
547
                r.log.SetPhase(PhaseCodex)
5✔
548
                if claudeResult.Error != nil {
5✔
549
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
550
                                return err
×
551
                        }
×
552
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
×
553
                }
554

555
                claudeResponse = claudeResult.Output
5✔
556

5✔
557
                // exit only when claude sees "no findings"
5✔
558
                if IsCodexDone(claudeResult.Signal) {
10✔
559
                        r.log.Print("%s review complete - no more findings", cfg.name)
5✔
560
                        return nil
5✔
561
                }
5✔
562

563
                time.Sleep(r.iterationDelay)
×
564
        }
565

566
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
2✔
567
        return nil
2✔
568
}
569

570
// buildCodexPrompt creates the prompt for codex review.
571
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
9✔
572
        // build plan context if available
9✔
573
        planContext := ""
9✔
574
        if r.cfg.PlanFile != "" {
12✔
575
                planContext = fmt.Sprintf(`
3✔
576
## Plan Context
3✔
577
The code implements the plan at: %s
3✔
578

3✔
579
---
3✔
580
`, r.resolvePlanFilePath())
3✔
581
        }
3✔
582

583
        // different diff command based on iteration
584
        var diffInstruction, diffDescription string
9✔
585
        if isFirst {
18✔
586
                defaultBranch := r.getDefaultBranch()
9✔
587
                diffInstruction = fmt.Sprintf("Run: git diff %s...HEAD", defaultBranch)
9✔
588
                diffDescription = fmt.Sprintf("code changes between %s and HEAD branch", defaultBranch)
9✔
589
        } else {
9✔
590
                diffInstruction = "Run: git diff"
×
591
                diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"
×
592
        }
×
593

594
        basePrompt := fmt.Sprintf(`%sReview the %s.
9✔
595

9✔
596
%s
9✔
597

9✔
598
Analyze for:
9✔
599
- Bugs and logic errors
9✔
600
- Security vulnerabilities
9✔
601
- Race conditions
9✔
602
- Error handling gaps
9✔
603
- Code quality issues
9✔
604

9✔
605
Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)
9✔
606

9✔
607
        if claudeResponse != "" {
9✔
608
                return fmt.Sprintf(`%s
×
609

×
610
---
×
611
PREVIOUS REVIEW CONTEXT:
×
612
Claude (previous reviewer) responded to your findings:
×
613

×
614
%s
×
615

×
616
Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.
×
617
If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)
×
618
        }
×
619

620
        return basePrompt
9✔
621
}
622

623
// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
624
func (r *Runner) hasUncompletedTasks() bool {
13✔
625
        content, err := os.ReadFile(r.resolvePlanFilePath())
13✔
626
        if err != nil {
13✔
627
                return true // assume incomplete if can't read
×
628
        }
×
629

630
        // look for uncompleted checkbox pattern: [ ] (not [x])
631
        for line := range strings.SplitSeq(string(content), "\n") {
43✔
632
                trimmed := strings.TrimSpace(line)
30✔
633
                if strings.HasPrefix(trimmed, "- [ ]") {
33✔
634
                        return true
3✔
635
                }
3✔
636
        }
637
        return false
10✔
638
}
639

640
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
641
// extracts text until first code block or 500 chars, whichever is shorter.
642
func (r *Runner) showCodexSummary(output string) {
4✔
643
        r.showExternalReviewSummary("codex", output)
4✔
644
}
4✔
645

646
// showCustomSummary displays a condensed summary of custom review output before Claude evaluation.
647
func (r *Runner) showCustomSummary(output string) {
1✔
648
        r.showExternalReviewSummary("custom", output)
1✔
649
}
1✔
650

651
// showExternalReviewSummary displays a condensed summary of external review output.
652
// extracts text until first code block or 5000 chars, whichever is shorter.
653
func (r *Runner) showExternalReviewSummary(toolName, output string) {
5✔
654
        summary := output
5✔
655

5✔
656
        // trim to first code block if present
5✔
657
        if idx := strings.Index(summary, "```"); idx > 0 {
5✔
658
                summary = summary[:idx]
×
659
        }
×
660

661
        // limit to 5000 chars
662
        if len(summary) > 5000 {
5✔
663
                summary = summary[:5000] + "..."
×
664
        }
×
665

666
        summary = strings.TrimSpace(summary)
5✔
667
        if summary == "" {
5✔
668
                return
×
669
        }
×
670

671
        r.log.Print("%s findings:", toolName)
5✔
672
        for line := range strings.SplitSeq(summary, "\n") {
10✔
673
                if strings.TrimSpace(line) == "" {
5✔
674
                        continue
×
675
                }
676
                r.log.PrintAligned("  " + line)
5✔
677
        }
678
}
679

680
// ErrUserRejectedPlan is returned when user rejects the plan draft.
681
var ErrUserRejectedPlan = errors.New("user rejected plan")
682

683
// draftReviewResult holds the result of draft review handling.
684
type draftReviewResult struct {
685
        handled  bool   // true if draft was found and handled
686
        feedback string // revision feedback (non-empty only for "revise" action)
687
        err      error  // error if review failed or user rejected
688
}
689

690
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
691
// returns result indicating whether draft was handled and any feedback/errors.
692
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
15✔
693
        planContent, draftErr := ParsePlanDraftPayload(output)
15✔
694
        if draftErr != nil {
24✔
695
                // log malformed signals (but not "no signal" which is expected)
9✔
696
                if !errors.Is(draftErr, ErrNoPlanDraftSignal) {
10✔
697
                        r.log.Print("warning: %v", draftErr)
1✔
698
                }
1✔
699
                return draftReviewResult{handled: false}
9✔
700
        }
701

702
        r.log.Print("plan draft ready for review")
6✔
703

6✔
704
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
6✔
705
        if askErr != nil {
7✔
706
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
707
        }
1✔
708

709
        // log the draft review action and feedback to progress file
710
        r.log.LogDraftReview(action, feedback)
5✔
711

5✔
712
        switch action {
5✔
713
        case "accept":
3✔
714
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
715
                return draftReviewResult{handled: true}
3✔
716
        case "revise":
1✔
717
                r.log.Print("revision requested, re-running with feedback...")
1✔
718
                return draftReviewResult{handled: true, feedback: feedback}
1✔
719
        case "reject":
1✔
720
                r.log.Print("plan rejected by user")
1✔
721
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
722
        }
723

724
        return draftReviewResult{handled: true}
×
725
}
726

727
// handlePlanQuestion processes QUESTION signal if present in output.
728
// returns true if question was found and handled, false otherwise.
729
// returns error if question handling failed.
730
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
731
        question, err := ParseQuestionPayload(output)
9✔
732
        if err != nil {
15✔
733
                // log malformed signals (but not "no signal" which is expected)
6✔
734
                if !errors.Is(err, ErrNoQuestionSignal) {
6✔
735
                        r.log.Print("warning: %v", err)
×
736
                }
×
737
                return false, nil
6✔
738
        }
739

740
        r.log.LogQuestion(question.Question, question.Options)
3✔
741

3✔
742
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
743
        if askErr != nil {
4✔
744
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
745
        }
1✔
746

747
        r.log.LogAnswer(answer)
2✔
748
        return true, nil
2✔
749
}
750

751
// runPlanCreation executes the interactive plan creation loop.
752
// the loop continues until PLAN_READY signal or max iterations reached.
753
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
754
func (r *Runner) runPlanCreation(ctx context.Context) error {
16✔
755
        if r.cfg.PlanDescription == "" {
17✔
756
                return errors.New("plan description required for plan mode")
1✔
757
        }
1✔
758
        if r.inputCollector == nil {
16✔
759
                return errors.New("input collector required for plan mode")
1✔
760
        }
1✔
761

762
        r.log.SetPhase(PhasePlan)
14✔
763
        r.log.PrintRaw("starting interactive plan creation\n")
14✔
764
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
14✔
765

14✔
766
        // plan iterations use 20% of max_iterations (min 5)
14✔
767
        maxPlanIterations := max(5, r.cfg.MaxIterations/5)
14✔
768

14✔
769
        // track revision feedback for context in next iteration
14✔
770
        var lastRevisionFeedback string
14✔
771

14✔
772
        for i := 1; i <= maxPlanIterations; i++ {
39✔
773
                select {
25✔
774
                case <-ctx.Done():
1✔
775
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
776
                default:
24✔
777
                }
778

779
                r.log.PrintSection(NewPlanIterationSection(i))
24✔
780

24✔
781
                prompt := r.buildPlanPrompt()
24✔
782
                // append revision feedback context if present
24✔
783
                if lastRevisionFeedback != "" {
25✔
784
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
1✔
785
                        lastRevisionFeedback = "" // clear after use
1✔
786
                }
1✔
787

788
                result := r.claude.Run(ctx, prompt)
24✔
789
                if result.Error != nil {
26✔
790
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
791
                                return err
1✔
792
                        }
1✔
793
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
794
                }
795

796
                if result.Signal == SignalFailed {
23✔
797
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
798
                }
1✔
799

800
                // check for PLAN_READY signal
801
                if IsPlanReady(result.Signal) {
27✔
802
                        r.log.Print("plan creation completed")
6✔
803
                        return nil
6✔
804
                }
6✔
805

806
                // check for PLAN_DRAFT signal - present draft for user review
807
                draftResult := r.handlePlanDraft(ctx, result.Output)
15✔
808
                if draftResult.err != nil {
17✔
809
                        return draftResult.err
2✔
810
                }
2✔
811
                if draftResult.handled {
17✔
812
                        lastRevisionFeedback = draftResult.feedback
4✔
813
                        time.Sleep(r.iterationDelay)
4✔
814
                        continue
4✔
815
                }
816

817
                // check for QUESTION signal
818
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
819
                if err != nil {
10✔
820
                        return err
1✔
821
                }
1✔
822
                if handled {
10✔
823
                        time.Sleep(r.iterationDelay)
2✔
824
                        continue
2✔
825
                }
826

827
                // no question, no draft, and no completion - continue
828
                time.Sleep(r.iterationDelay)
6✔
829
        }
830

831
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
832
}
833

834
// handlePatternMatchError checks if err is a PatternMatchError and logs appropriate messages.
835
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
836
func (r *Runner) handlePatternMatchError(err error, tool string) error {
8✔
837
        var patternErr *executor.PatternMatchError
8✔
838
        if errors.As(err, &patternErr) {
12✔
839
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
840
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
841
                return err
4✔
842
        }
4✔
843
        return nil
4✔
844
}
845

846
// runFinalize executes the optional finalize step after successful reviews.
847
// runs once, best-effort: failures are logged but don't block success.
848
// exception: context cancellation is propagated (user wants to abort).
849
func (r *Runner) runFinalize(ctx context.Context) error {
17✔
850
        if !r.cfg.FinalizeEnabled {
28✔
851
                return nil
11✔
852
        }
11✔
853

854
        r.log.SetPhase(PhaseFinalize)
6✔
855
        r.log.PrintSection(NewGenericSection("finalize step"))
6✔
856

6✔
857
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
6✔
858
        result := r.claude.Run(ctx, prompt)
6✔
859

6✔
860
        if result.Error != nil {
8✔
861
                // propagate context cancellation - user wants to abort
2✔
862
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
3✔
863
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
864
                }
1✔
865
                // pattern match (rate limit) - log via shared helper, but don't fail (best-effort)
866
                if r.handlePatternMatchError(result.Error, "claude") != nil {
1✔
867
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
×
868
                }
×
869
                // best-effort: log error but don't fail
870
                r.log.Print("finalize step failed: %v", result.Error)
1✔
871
                return nil
1✔
872
        }
873

874
        if result.Signal == SignalFailed {
5✔
875
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
876
                return nil
1✔
877
        }
1✔
878

879
        r.log.Print("finalize step completed")
3✔
880
        return nil
3✔
881
}
882

883
// needsCodexBinary returns true if the current configuration requires the codex binary.
884
// returns false when external_review_tool is "custom" or "none", since codex isn't used.
885
func needsCodexBinary(appConfig *config.Config) bool {
3✔
886
        if appConfig == nil {
3✔
NEW
887
                return true // default behavior assumes codex
×
NEW
888
        }
×
889
        switch appConfig.ExternalReviewTool {
3✔
890
        case "custom", "none":
2✔
891
                return false
2✔
892
        default:
1✔
893
                return true // "codex" or empty (default) requires codex binary
1✔
894
        }
895
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc