• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 21725861297

05 Feb 2026 07:41PM UTC coverage: 80.861% (+0.4%) from 80.435%
21725861297

push

github

umputun
fix: strengthen codex eval prompt to prevent premature signal

claude was emitting CODEX_REVIEW_DONE after fixing issues instead of
stopping to let codex verify fixes. add explicit instructions that
the signal must only be emitted when codex reports no findings.

4529 of 5601 relevant lines covered (80.86%)

153.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.77
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "os"
9
        "os/exec"
10
        "strings"
11
        "time"
12

13
        "github.com/umputun/ralphex/pkg/config"
14
        "github.com/umputun/ralphex/pkg/executor"
15
)
16

17
// DefaultIterationDelay is the pause between iterations to allow system to settle.
18
const DefaultIterationDelay = 2 * time.Second
19

20
// Mode represents the execution mode.
21
type Mode string
22

23
const (
24
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
25
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
26
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
27
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
28
        ModePlan      Mode = "plan"       // interactive plan creation mode
29
)
30

31
// Config holds runner configuration.
32
type Config struct {
33
        PlanFile         string         // path to plan file (required for full mode)
34
        PlanDescription  string         // plan description for interactive plan creation mode
35
        ProgressPath     string         // path to progress file
36
        Mode             Mode           // execution mode
37
        MaxIterations    int            // maximum iterations for task phase
38
        Debug            bool           // enable debug output
39
        NoColor          bool           // disable color output
40
        IterationDelayMs int            // delay between iterations in milliseconds
41
        TaskRetryCount   int            // number of times to retry failed tasks
42
        CodexEnabled     bool           // whether codex review is enabled
43
        FinalizeEnabled  bool           // whether finalize step is enabled
44
        DefaultBranch    string         // default branch name (detected from repo)
45
        AppConfig        *config.Config // full application config (for executors and prompts)
46
}
47

48
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
49
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
50
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
51

52
// Executor runs CLI commands and returns results.
53
type Executor interface {
54
        Run(ctx context.Context, prompt string) executor.Result
55
}
56

57
// Logger provides logging functionality.
58
type Logger interface {
59
        SetPhase(phase Phase)
60
        Print(format string, args ...any)
61
        PrintRaw(format string, args ...any)
62
        PrintSection(section Section)
63
        PrintAligned(text string)
64
        LogQuestion(question string, options []string)
65
        LogAnswer(answer string)
66
        LogDraftReview(action string, feedback string)
67
        Path() string
68
}
69

70
// InputCollector provides interactive input collection for plan creation.
71
type InputCollector interface {
72
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
73
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
74
}
75

76
// Runner orchestrates the execution loop.
77
type Runner struct {
78
        cfg            Config
79
        log            Logger
80
        claude         Executor
81
        codex          Executor
82
        custom         *executor.CustomExecutor
83
        inputCollector InputCollector
84
        iterationDelay time.Duration
85
        taskRetryCount int
86
}
87

88
// New creates a new Runner with the given configuration.
89
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
90
func New(cfg Config, log Logger) *Runner {
3✔
91
        // build claude executor with config values
3✔
92
        claudeExec := &executor.ClaudeExecutor{
3✔
93
                OutputHandler: func(text string) {
3✔
94
                        log.PrintAligned(text)
×
95
                },
×
96
                Debug: cfg.Debug,
97
        }
98
        if cfg.AppConfig != nil {
6✔
99
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
3✔
100
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
3✔
101
                claudeExec.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
3✔
102
        }
3✔
103

104
        // build codex executor with config values
105
        codexExec := &executor.CodexExecutor{
3✔
106
                OutputHandler: func(text string) {
3✔
107
                        log.PrintAligned(text)
×
108
                },
×
109
                Debug: cfg.Debug,
110
        }
111
        if cfg.AppConfig != nil {
6✔
112
                codexExec.Command = cfg.AppConfig.CodexCommand
3✔
113
                codexExec.Model = cfg.AppConfig.CodexModel
3✔
114
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
3✔
115
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
3✔
116
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
3✔
117
                codexExec.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
3✔
118
        }
3✔
119

120
        // build custom executor if custom review script is configured
121
        var customExec *executor.CustomExecutor
3✔
122
        if cfg.AppConfig != nil && cfg.AppConfig.CustomReviewScript != "" {
4✔
123
                customExec = &executor.CustomExecutor{
1✔
124
                        Script: cfg.AppConfig.CustomReviewScript,
1✔
125
                        OutputHandler: func(text string) {
1✔
126
                                log.PrintAligned(text)
×
127
                        },
×
128
                        ErrorPatterns: cfg.AppConfig.CodexErrorPatterns, // reuse codex error patterns
129
                }
130
        }
131

132
        // auto-disable codex if the binary is not installed AND we need codex
133
        // (skip this check if using custom external review tool or external review is disabled)
134
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
4✔
135
                codexCmd := codexExec.Command
1✔
136
                if codexCmd == "" {
1✔
137
                        codexCmd = "codex"
×
138
                }
×
139
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
140
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
141
                        cfg.CodexEnabled = false
1✔
142
                }
1✔
143
        }
144

145
        return NewWithExecutors(cfg, log, claudeExec, codexExec, customExec)
3✔
146
}
147

148
// NewWithExecutors creates a new Runner with custom executors (for testing).
149
func NewWithExecutors(cfg Config, log Logger, claude, codex Executor, custom *executor.CustomExecutor) *Runner {
62✔
150
        // determine iteration delay from config or default
62✔
151
        iterDelay := DefaultIterationDelay
62✔
152
        if cfg.IterationDelayMs > 0 {
78✔
153
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
16✔
154
        }
16✔
155

156
        // determine task retry count from config
157
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
158
        retryCount := 1
62✔
159
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
112✔
160
                retryCount = cfg.TaskRetryCount
50✔
161
        } else if cfg.TaskRetryCount > 0 {
63✔
162
                retryCount = cfg.TaskRetryCount
1✔
163
        }
1✔
164

165
        return &Runner{
62✔
166
                cfg:            cfg,
62✔
167
                log:            log,
62✔
168
                claude:         claude,
62✔
169
                codex:          codex,
62✔
170
                custom:         custom,
62✔
171
                iterationDelay: iterDelay,
62✔
172
                taskRetryCount: retryCount,
62✔
173
        }
62✔
174
}
175

176
// SetInputCollector sets the input collector for plan creation mode.
177
func (r *Runner) SetInputCollector(c InputCollector) {
15✔
178
        r.inputCollector = c
15✔
179
}
15✔
180

181
// Run executes the main loop based on configured mode.
182
func (r *Runner) Run(ctx context.Context) error {
50✔
183
        switch r.cfg.Mode {
50✔
184
        case ModeFull:
13✔
185
                return r.runFull(ctx)
13✔
186
        case ModeReview:
7✔
187
                return r.runReviewOnly(ctx)
7✔
188
        case ModeCodexOnly:
9✔
189
                return r.runCodexOnly(ctx)
9✔
190
        case ModeTasksOnly:
4✔
191
                return r.runTasksOnly(ctx)
4✔
192
        case ModePlan:
16✔
193
                return r.runPlanCreation(ctx)
16✔
194
        default:
1✔
195
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
196
        }
197
}
198

199
// runFull executes the complete pipeline: tasks → review → codex → review.
200
func (r *Runner) runFull(ctx context.Context) error {
13✔
201
        if r.cfg.PlanFile == "" {
14✔
202
                return errors.New("plan file required for full mode")
1✔
203
        }
1✔
204

205
        // phase 1: task execution
206
        r.log.SetPhase(PhaseTask)
12✔
207
        r.log.PrintRaw("starting task execution phase\n")
12✔
208

12✔
209
        if err := r.runTaskPhase(ctx); err != nil {
18✔
210
                return fmt.Errorf("task phase: %w", err)
6✔
211
        }
6✔
212

213
        // phase 2: first review pass - address ALL findings
214
        r.log.SetPhase(PhaseReview)
6✔
215
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))
6✔
216

6✔
217
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
6✔
218
                return fmt.Errorf("first review: %w", err)
×
219
        }
×
220

221
        // phase 2.1: claude review loop (critical/major) before codex
222
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
223
                return fmt.Errorf("pre-codex review loop: %w", err)
×
224
        }
×
225

226
        // phase 2.5: codex external review loop
227
        r.log.SetPhase(PhaseCodex)
6✔
228
        r.log.PrintSection(NewGenericSection("codex external review"))
6✔
229

6✔
230
        if err := r.runCodexLoop(ctx); err != nil {
6✔
231
                return fmt.Errorf("codex loop: %w", err)
×
232
        }
×
233

234
        // phase 3: claude review loop (critical/major) after codex
235
        r.log.SetPhase(PhaseReview)
6✔
236

6✔
237
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
238
                return fmt.Errorf("post-codex review loop: %w", err)
×
239
        }
×
240

241
        // optional finalize step (best-effort, but propagates context cancellation)
242
        if err := r.runFinalize(ctx); err != nil {
6✔
243
                return err
×
244
        }
×
245

246
        r.log.Print("all phases completed successfully")
6✔
247
        return nil
6✔
248
}
249

250
// runReviewOnly executes only the review pipeline: review → codex → review.
251
func (r *Runner) runReviewOnly(ctx context.Context) error {
7✔
252
        // phase 1: first review
7✔
253
        r.log.SetPhase(PhaseReview)
7✔
254
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))
7✔
255

7✔
256
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
8✔
257
                return fmt.Errorf("first review: %w", err)
1✔
258
        }
1✔
259

260
        // phase 1.1: claude review loop (critical/major) before codex
261
        if err := r.runClaudeReviewLoop(ctx); err != nil {
7✔
262
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
263
        }
1✔
264

265
        // phase 2: codex external review loop
266
        r.log.SetPhase(PhaseCodex)
5✔
267
        r.log.PrintSection(NewGenericSection("codex external review"))
5✔
268

5✔
269
        if err := r.runCodexLoop(ctx); err != nil {
7✔
270
                return fmt.Errorf("codex loop: %w", err)
2✔
271
        }
2✔
272

273
        // phase 3: claude review loop (critical/major) after codex
274
        r.log.SetPhase(PhaseReview)
3✔
275

3✔
276
        if err := r.runClaudeReviewLoop(ctx); err != nil {
3✔
277
                return fmt.Errorf("post-codex review loop: %w", err)
×
278
        }
×
279

280
        // optional finalize step (best-effort, but propagates context cancellation)
281
        if err := r.runFinalize(ctx); err != nil {
4✔
282
                return err
1✔
283
        }
1✔
284

285
        r.log.Print("review phases completed successfully")
2✔
286
        return nil
2✔
287
}
288

289
// runCodexOnly executes only the codex pipeline: codex → review.
290
func (r *Runner) runCodexOnly(ctx context.Context) error {
9✔
291
        // phase 1: codex external review loop
9✔
292
        r.log.SetPhase(PhaseCodex)
9✔
293
        r.log.PrintSection(NewGenericSection("codex external review"))
9✔
294

9✔
295
        if err := r.runCodexLoop(ctx); err != nil {
10✔
296
                return fmt.Errorf("codex loop: %w", err)
1✔
297
        }
1✔
298

299
        // phase 2: claude review loop (critical/major) after codex
300
        r.log.SetPhase(PhaseReview)
8✔
301

8✔
302
        if err := r.runClaudeReviewLoop(ctx); err != nil {
8✔
303
                return fmt.Errorf("post-codex review loop: %w", err)
×
304
        }
×
305

306
        // optional finalize step (best-effort, but propagates context cancellation)
307
        if err := r.runFinalize(ctx); err != nil {
8✔
308
                return err
×
309
        }
×
310

311
        r.log.Print("codex phases completed successfully")
8✔
312
        return nil
8✔
313
}
314

315
// runTasksOnly executes only task phase, skipping all reviews.
316
func (r *Runner) runTasksOnly(ctx context.Context) error {
4✔
317
        if r.cfg.PlanFile == "" {
5✔
318
                return errors.New("plan file required for tasks-only mode")
1✔
319
        }
1✔
320

321
        r.log.SetPhase(PhaseTask)
3✔
322
        r.log.PrintRaw("starting task execution phase\n")
3✔
323

3✔
324
        if err := r.runTaskPhase(ctx); err != nil {
4✔
325
                return fmt.Errorf("task phase: %w", err)
1✔
326
        }
1✔
327

328
        r.log.Print("task execution completed successfully")
2✔
329
        return nil
2✔
330
}
331

332
// runTaskPhase executes tasks until completion or max iterations.
333
// executes ONE Task section per iteration.
334
func (r *Runner) runTaskPhase(ctx context.Context) error {
15✔
335
        prompt := r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt)
15✔
336
        retryCount := 0
15✔
337

15✔
338
        for i := 1; i <= r.cfg.MaxIterations; i++ {
34✔
339
                select {
19✔
340
                case <-ctx.Done():
1✔
341
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
342
                default:
18✔
343
                }
344

345
                r.log.PrintSection(NewTaskIterationSection(i))
18✔
346

18✔
347
                result := r.claude.Run(ctx, prompt)
18✔
348
                if result.Error != nil {
20✔
349
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
350
                                return err
1✔
351
                        }
1✔
352
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
353
                }
354

355
                if result.Signal == SignalCompleted {
24✔
356
                        // verify plan actually has no uncompleted checkboxes
8✔
357
                        if r.hasUncompletedTasks() {
8✔
358
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
359
                                continue
×
360
                        }
361
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
8✔
362
                        return nil
8✔
363
                }
364

365
                if result.Signal == SignalFailed {
13✔
366
                        if retryCount < r.taskRetryCount {
7✔
367
                                r.log.Print("task failed, retrying...")
2✔
368
                                retryCount++
2✔
369
                                time.Sleep(r.iterationDelay)
2✔
370
                                continue
2✔
371
                        }
372
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
373
                }
374

375
                retryCount = 0
3✔
376
                // continue with same prompt - it reads from plan file each time
3✔
377
                time.Sleep(r.iterationDelay)
3✔
378
        }
379

380
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
381
}
382

383
// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
384
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
13✔
385
        result := r.claude.Run(ctx, prompt)
13✔
386
        if result.Error != nil {
13✔
387
                if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
×
388
                        return err
×
389
                }
×
390
                return fmt.Errorf("claude execution: %w", result.Error)
×
391
        }
392

393
        if result.Signal == SignalFailed {
14✔
394
                return errors.New("review failed (FAILED signal received)")
1✔
395
        }
1✔
396

397
        if !IsReviewDone(result.Signal) {
12✔
398
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
×
399
        }
×
400

401
        return nil
12✔
402
}
403

404
// runClaudeReviewLoop runs claude review iterations using second review prompt.
405
func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {
29✔
406
        // review iterations = 10% of max_iterations (min 3)
29✔
407
        maxReviewIterations := max(3, r.cfg.MaxIterations/10)
29✔
408

29✔
409
        for i := 1; i <= maxReviewIterations; i++ {
58✔
410
                select {
29✔
411
                case <-ctx.Done():
×
412
                        return fmt.Errorf("review: %w", ctx.Err())
×
413
                default:
29✔
414
                }
415

416
                r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))
29✔
417

29✔
418
                result := r.claude.Run(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt))
29✔
419
                if result.Error != nil {
30✔
420
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
2✔
421
                                return err
1✔
422
                        }
1✔
423
                        return fmt.Errorf("claude execution: %w", result.Error)
×
424
                }
425

426
                if result.Signal == SignalFailed {
28✔
427
                        return errors.New("review failed (FAILED signal received)")
×
428
                }
×
429

430
                if IsReviewDone(result.Signal) {
56✔
431
                        r.log.Print("claude review complete - no more findings")
28✔
432
                        return nil
28✔
433
                }
28✔
434

435
                r.log.Print("issues fixed, running another review iteration...")
×
436
                time.Sleep(r.iterationDelay)
×
437
        }
438

439
        r.log.Print("max claude review iterations reached, continuing...")
×
440
        return nil
×
441
}
442

443
// externalReviewTool returns the effective external review tool to use.
444
// handles backward compatibility: codex_enabled = false → "none"
445
// the CodexEnabled flag takes precedence for backward compatibility.
446
func (r *Runner) externalReviewTool() string {
20✔
447
        // backward compatibility: codex_enabled = false means no external review
20✔
448
        // this takes precedence over external_review_tool setting
20✔
449
        if !r.cfg.CodexEnabled {
29✔
450
                return "none"
9✔
451
        }
9✔
452

453
        // check explicit external_review_tool setting
454
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
22✔
455
                return r.cfg.AppConfig.ExternalReviewTool
11✔
456
        }
11✔
457

458
        // default to codex
459
        return "codex"
×
460
}
461

462
// runCodexLoop runs the external review loop (codex or custom) until no findings.
463
func (r *Runner) runCodexLoop(ctx context.Context) error {
20✔
464
        tool := r.externalReviewTool()
20✔
465

20✔
466
        // skip external review phase if disabled
20✔
467
        if tool == "none" {
30✔
468
                r.log.Print("external review disabled, skipping...")
10✔
469
                return nil
10✔
470
        }
10✔
471

472
        // custom review tool
473
        if tool == "custom" {
12✔
474
                if r.custom == nil {
3✔
475
                        return errors.New("custom review script not configured")
1✔
476
                }
1✔
477
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
1✔
478
                        name:           "custom",
1✔
479
                        runReview:      func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
2✔
480
                        buildPrompt:    r.buildCustomReviewPrompt,
481
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
482
                        showSummary:    r.showCustomSummary,
483
                        makeSection:    NewCustomIterationSection,
484
                })
485
        }
486

487
        // default: codex review
488
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
8✔
489
                name:           "codex",
8✔
490
                runReview:      r.codex.Run,
8✔
491
                buildPrompt:    r.buildCodexPrompt,
8✔
492
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
8✔
493
                showSummary:    r.showCodexSummary,
8✔
494
                makeSection:    NewCodexIterationSection,
8✔
495
        })
8✔
496
}
497

498
// externalReviewConfig holds callbacks for running an external review tool.
499
type externalReviewConfig struct {
500
        name           string                                                     // tool name for error messages
501
        runReview      func(ctx context.Context, prompt string) executor.Result   // run the external review tool
502
        buildPrompt    func(isFirst bool, claudeResponse string) string           // build prompt for review tool
503
        buildEvalPrompt func(output string) string                                // build evaluation prompt for claude
504
        showSummary    func(output string)                                        // display review findings summary
505
        makeSection    func(iteration int) Section                                // create section header
506
}
507

508
// runExternalReviewLoop runs a generic external review tool-claude loop until no findings.
509
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) error {
9✔
510
        // iterations = 20% of max_iterations (min 3)
9✔
511
        maxIterations := max(3, r.cfg.MaxIterations/5)
9✔
512

9✔
513
        var claudeResponse string // first iteration has no prior response
9✔
514

9✔
515
        for i := 1; i <= maxIterations; i++ {
18✔
516
                select {
9✔
517
                case <-ctx.Done():
×
518
                        return fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
519
                default:
9✔
520
                }
521

522
                r.log.PrintSection(cfg.makeSection(i))
9✔
523

9✔
524
                // run external review tool
9✔
525
                reviewResult := cfg.runReview(ctx, cfg.buildPrompt(i == 1, claudeResponse))
9✔
526
                if reviewResult.Error != nil {
11✔
527
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
528
                                return err
1✔
529
                        }
1✔
530
                        return fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
531
                }
532

533
                if reviewResult.Output == "" {
9✔
534
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
2✔
535
                        break
2✔
536
                }
537

538
                // show findings summary before Claude evaluation
539
                cfg.showSummary(reviewResult.Output)
5✔
540

5✔
541
                // pass output to claude for evaluation and fixing
5✔
542
                r.log.SetPhase(PhaseClaudeEval)
5✔
543
                r.log.PrintSection(NewClaudeEvalSection())
5✔
544
                claudeResult := r.claude.Run(ctx, cfg.buildEvalPrompt(reviewResult.Output))
5✔
545

5✔
546
                // restore codex phase for next iteration
5✔
547
                r.log.SetPhase(PhaseCodex)
5✔
548
                if claudeResult.Error != nil {
5✔
549
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
550
                                return err
×
551
                        }
×
552
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
×
553
                }
554

555
                claudeResponse = claudeResult.Output
5✔
556

5✔
557
                // exit only when claude sees "no findings"
5✔
558
                if IsCodexDone(claudeResult.Signal) {
10✔
559
                        r.log.Print("%s review complete - no more findings", cfg.name)
5✔
560
                        return nil
5✔
561
                }
5✔
562

563
                time.Sleep(r.iterationDelay)
×
564
        }
565

566
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
2✔
567
        return nil
2✔
568
}
569

570
// buildCodexPrompt creates the prompt for codex review.
571
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
9✔
572
        // build plan context if available
9✔
573
        planContext := ""
9✔
574
        if r.cfg.PlanFile != "" {
12✔
575
                planContext = fmt.Sprintf(`
3✔
576
## Plan Context
3✔
577
The code implements the plan at: %s
3✔
578

3✔
579
---
3✔
580
`, r.resolvePlanFilePath())
3✔
581
        }
3✔
582

583
        // different diff command based on iteration
584
        var diffInstruction, diffDescription string
9✔
585
        if isFirst {
18✔
586
                defaultBranch := r.getDefaultBranch()
9✔
587
                diffInstruction = fmt.Sprintf("Run: git diff %s...HEAD", defaultBranch)
9✔
588
                diffDescription = fmt.Sprintf("code changes between %s and HEAD branch", defaultBranch)
9✔
589
        } else {
9✔
590
                diffInstruction = "Run: git diff"
×
591
                diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"
×
592
        }
×
593

594
        basePrompt := fmt.Sprintf(`%sReview the %s.
9✔
595

9✔
596
%s
9✔
597

9✔
598
Analyze for:
9✔
599
- Bugs and logic errors
9✔
600
- Security vulnerabilities
9✔
601
- Race conditions
9✔
602
- Error handling gaps
9✔
603
- Code quality issues
9✔
604

9✔
605
Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)
9✔
606

9✔
607
        if claudeResponse != "" {
9✔
608
                return fmt.Sprintf(`%s
×
609

×
610
---
×
611
PREVIOUS REVIEW CONTEXT:
×
612
Claude (previous reviewer) responded to your findings:
×
613

×
614
%s
×
615

×
616
Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.
×
617
If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)
×
618
        }
×
619

620
        return basePrompt
9✔
621
}
622

623
// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
624
func (r *Runner) hasUncompletedTasks() bool {
13✔
625
        content, err := os.ReadFile(r.resolvePlanFilePath())
13✔
626
        if err != nil {
13✔
627
                return true // assume incomplete if can't read
×
628
        }
×
629

630
        // look for uncompleted checkbox pattern: [ ] (not [x])
631
        for line := range strings.SplitSeq(string(content), "\n") {
43✔
632
                trimmed := strings.TrimSpace(line)
30✔
633
                if strings.HasPrefix(trimmed, "- [ ]") {
33✔
634
                        return true
3✔
635
                }
3✔
636
        }
637
        return false
10✔
638
}
639

640
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
641
// extracts text until first code block or 500 chars, whichever is shorter.
642
func (r *Runner) showCodexSummary(output string) {
4✔
643
        r.showExternalReviewSummary("codex", output)
4✔
644
}
4✔
645

646
// showCustomSummary displays a condensed summary of custom review output before Claude evaluation.
647
func (r *Runner) showCustomSummary(output string) {
1✔
648
        r.showExternalReviewSummary("custom", output)
1✔
649
}
1✔
650

651
// showExternalReviewSummary displays a condensed summary of external review output.
652
// extracts text until first code block or 5000 chars, whichever is shorter.
653
func (r *Runner) showExternalReviewSummary(toolName, output string) {
5✔
654
        summary := output
5✔
655

5✔
656
        // trim to first code block if present
5✔
657
        if idx := strings.Index(summary, "```"); idx > 0 {
5✔
658
                summary = summary[:idx]
×
659
        }
×
660

661
        // limit to 5000 chars
662
        if len(summary) > 5000 {
5✔
663
                summary = summary[:5000] + "..."
×
664
        }
×
665

666
        summary = strings.TrimSpace(summary)
5✔
667
        if summary == "" {
5✔
668
                return
×
669
        }
×
670

671
        r.log.Print("%s findings:", toolName)
5✔
672
        for line := range strings.SplitSeq(summary, "\n") {
10✔
673
                if strings.TrimSpace(line) == "" {
5✔
674
                        continue
×
675
                }
676
                r.log.PrintAligned("  " + line)
5✔
677
        }
678
}
679

680
// ErrUserRejectedPlan is returned when user rejects the plan draft.
681
var ErrUserRejectedPlan = errors.New("user rejected plan")
682

683
// draftReviewResult holds the result of draft review handling.
684
type draftReviewResult struct {
685
        handled  bool   // true if draft was found and handled
686
        feedback string // revision feedback (non-empty only for "revise" action)
687
        err      error  // error if review failed or user rejected
688
}
689

690
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
691
// returns result indicating whether draft was handled and any feedback/errors.
692
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
15✔
693
        planContent, draftErr := ParsePlanDraftPayload(output)
15✔
694
        if draftErr != nil {
24✔
695
                // log malformed signals (but not "no signal" which is expected)
9✔
696
                if !errors.Is(draftErr, ErrNoPlanDraftSignal) {
10✔
697
                        r.log.Print("warning: %v", draftErr)
1✔
698
                }
1✔
699
                return draftReviewResult{handled: false}
9✔
700
        }
701

702
        r.log.Print("plan draft ready for review")
6✔
703

6✔
704
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
6✔
705
        if askErr != nil {
7✔
706
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
707
        }
1✔
708

709
        // log the draft review action and feedback to progress file
710
        r.log.LogDraftReview(action, feedback)
5✔
711

5✔
712
        switch action {
5✔
713
        case "accept":
3✔
714
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
715
                return draftReviewResult{handled: true}
3✔
716
        case "revise":
1✔
717
                r.log.Print("revision requested, re-running with feedback...")
1✔
718
                return draftReviewResult{handled: true, feedback: feedback}
1✔
719
        case "reject":
1✔
720
                r.log.Print("plan rejected by user")
1✔
721
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
722
        }
723

724
        return draftReviewResult{handled: true}
×
725
}
726

727
// handlePlanQuestion processes QUESTION signal if present in output.
728
// returns true if question was found and handled, false otherwise.
729
// returns error if question handling failed.
730
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
731
        question, err := ParseQuestionPayload(output)
9✔
732
        if err != nil {
15✔
733
                // log malformed signals (but not "no signal" which is expected)
6✔
734
                if !errors.Is(err, ErrNoQuestionSignal) {
6✔
735
                        r.log.Print("warning: %v", err)
×
736
                }
×
737
                return false, nil
6✔
738
        }
739

740
        r.log.LogQuestion(question.Question, question.Options)
3✔
741

3✔
742
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
743
        if askErr != nil {
4✔
744
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
745
        }
1✔
746

747
        r.log.LogAnswer(answer)
2✔
748
        return true, nil
2✔
749
}
750

751
// runPlanCreation executes the interactive plan creation loop.
752
// the loop continues until PLAN_READY signal or max iterations reached.
753
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
754
func (r *Runner) runPlanCreation(ctx context.Context) error {
16✔
755
        if r.cfg.PlanDescription == "" {
17✔
756
                return errors.New("plan description required for plan mode")
1✔
757
        }
1✔
758
        if r.inputCollector == nil {
16✔
759
                return errors.New("input collector required for plan mode")
1✔
760
        }
1✔
761

762
        r.log.SetPhase(PhasePlan)
14✔
763
        r.log.PrintRaw("starting interactive plan creation\n")
14✔
764
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
14✔
765

14✔
766
        // plan iterations use 20% of max_iterations (min 5)
14✔
767
        maxPlanIterations := max(5, r.cfg.MaxIterations/5)
14✔
768

14✔
769
        // track revision feedback for context in next iteration
14✔
770
        var lastRevisionFeedback string
14✔
771

14✔
772
        for i := 1; i <= maxPlanIterations; i++ {
39✔
773
                select {
25✔
774
                case <-ctx.Done():
1✔
775
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
776
                default:
24✔
777
                }
778

779
                r.log.PrintSection(NewPlanIterationSection(i))
24✔
780

24✔
781
                prompt := r.buildPlanPrompt()
24✔
782
                // append revision feedback context if present
24✔
783
                if lastRevisionFeedback != "" {
25✔
784
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
1✔
785
                        lastRevisionFeedback = "" // clear after use
1✔
786
                }
1✔
787

788
                result := r.claude.Run(ctx, prompt)
24✔
789
                if result.Error != nil {
26✔
790
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
791
                                return err
1✔
792
                        }
1✔
793
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
794
                }
795

796
                if result.Signal == SignalFailed {
23✔
797
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
798
                }
1✔
799

800
                // check for PLAN_READY signal
801
                if IsPlanReady(result.Signal) {
27✔
802
                        r.log.Print("plan creation completed")
6✔
803
                        return nil
6✔
804
                }
6✔
805

806
                // check for PLAN_DRAFT signal - present draft for user review
807
                draftResult := r.handlePlanDraft(ctx, result.Output)
15✔
808
                if draftResult.err != nil {
17✔
809
                        return draftResult.err
2✔
810
                }
2✔
811
                if draftResult.handled {
17✔
812
                        lastRevisionFeedback = draftResult.feedback
4✔
813
                        time.Sleep(r.iterationDelay)
4✔
814
                        continue
4✔
815
                }
816

817
                // check for QUESTION signal
818
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
819
                if err != nil {
10✔
820
                        return err
1✔
821
                }
1✔
822
                if handled {
10✔
823
                        time.Sleep(r.iterationDelay)
2✔
824
                        continue
2✔
825
                }
826

827
                // no question, no draft, and no completion - continue
828
                time.Sleep(r.iterationDelay)
6✔
829
        }
830

831
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
832
}
833

834
// handlePatternMatchError checks if err is a PatternMatchError and logs appropriate messages.
835
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
836
func (r *Runner) handlePatternMatchError(err error, tool string) error {
8✔
837
        var patternErr *executor.PatternMatchError
8✔
838
        if errors.As(err, &patternErr) {
12✔
839
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
840
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
841
                return err
4✔
842
        }
4✔
843
        return nil
4✔
844
}
845

846
// runFinalize executes the optional finalize step after successful reviews.
847
// runs once, best-effort: failures are logged but don't block success.
848
// exception: context cancellation is propagated (user wants to abort).
849
func (r *Runner) runFinalize(ctx context.Context) error {
17✔
850
        if !r.cfg.FinalizeEnabled {
28✔
851
                return nil
11✔
852
        }
11✔
853

854
        r.log.SetPhase(PhaseFinalize)
6✔
855
        r.log.PrintSection(NewGenericSection("finalize step"))
6✔
856

6✔
857
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
6✔
858
        result := r.claude.Run(ctx, prompt)
6✔
859

6✔
860
        if result.Error != nil {
8✔
861
                // propagate context cancellation - user wants to abort
2✔
862
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
3✔
863
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
864
                }
1✔
865
                // pattern match (rate limit) - log via shared helper, but don't fail (best-effort)
866
                if r.handlePatternMatchError(result.Error, "claude") != nil {
1✔
867
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
×
868
                }
×
869
                // best-effort: log error but don't fail
870
                r.log.Print("finalize step failed: %v", result.Error)
1✔
871
                return nil
1✔
872
        }
873

874
        if result.Signal == SignalFailed {
5✔
875
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
876
                return nil
1✔
877
        }
1✔
878

879
        r.log.Print("finalize step completed")
3✔
880
        return nil
3✔
881
}
882

883
// needsCodexBinary returns true if the current configuration requires the codex binary.
884
// returns false when external_review_tool is "custom" or "none", since codex isn't used.
885
func needsCodexBinary(appConfig *config.Config) bool {
3✔
886
        if appConfig == nil {
3✔
887
                return true // default behavior assumes codex
×
888
        }
×
889
        switch appConfig.ExternalReviewTool {
3✔
890
        case "custom", "none":
2✔
891
                return false
2✔
892
        default:
1✔
893
                return true // "codex" or empty (default) requires codex binary
1✔
894
        }
895
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc