• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 24009605900

05 Apr 2026 08:15PM UTC coverage: 81.809% (-0.1%) from 81.915%
24009605900

Pull #272

github

dwilberger
feat: add per-phase Claude model configuration

Add claude_model and review_model config parameters to control which
Claude model is used for task execution vs review phases. This allows
using a more capable model (e.g., opus) for tasks while using a faster
model (e.g., sonnet) for review iterations, reducing cost on
review-heavy runs.

- Add Model field to ClaudeExecutor, injecting --model flag when set
- Create separate review executor when review_model differs
- Add --claude-model and --review-model CLI flags
- Empty values preserve current behavior (no --model flag)

Closes #271
Pull Request #272: feat: add per-phase Claude model configuration (claude_model + review_model)

42 of 59 new or added lines in 5 files covered. (71.19%)

3 existing lines in 1 file now uncovered.

6368 of 7784 relevant lines covered (81.81%)

208.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.56
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "os/exec"
9
        "strings"
10
        "time"
11

12
        "github.com/umputun/ralphex/pkg/config"
13
        "github.com/umputun/ralphex/pkg/executor"
14
        "github.com/umputun/ralphex/pkg/plan"
15
        "github.com/umputun/ralphex/pkg/status"
16
)
17

18
// DefaultIterationDelay is the pause between iterations to allow system to settle.
19
const DefaultIterationDelay = 2 * time.Second
20

21
const (
22
        minReviewIterations    = 3    // minimum claude review iterations
23
        reviewIterationDivisor = 10   // review iterations = max_iterations / divisor
24
        minCodexIterations     = 3    // minimum codex review iterations
25
        codexIterationDivisor  = 5    // codex iterations = max_iterations / divisor
26
        minPlanIterations      = 5    // minimum plan creation iterations
27
        planIterationDivisor   = 5    // plan iterations = max_iterations / divisor
28
        maxCodexSummaryLen     = 5000 // max chars for codex output summary
29
)
30

31
// Mode represents the execution mode.
32
type Mode string
33

34
const (
35
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
36
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
37
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
38
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
39
        ModePlan      Mode = "plan"       // interactive plan creation mode
40
)
41

42
// Config holds runner configuration.
43
type Config struct {
44
        PlanFile              string         // path to plan file (required for full mode)
45
        PlanDescription       string         // plan description for interactive plan creation mode
46
        ProgressPath          string         // path to progress file
47
        Mode                  Mode           // execution mode
48
        MaxIterations         int            // maximum iterations for task phase
49
        MaxExternalIterations int            // override external review iteration limit (0 = auto)
50
        ReviewPatience        int            // terminate external review after N unchanged rounds (0 = disabled)
51
        Debug                 bool           // enable debug output
52
        NoColor               bool           // disable color output
53
        IterationDelayMs      int            // delay between iterations in milliseconds
54
        TaskRetryCount        int            // number of times to retry failed tasks
55
        ClaudeModel           string         // model for task execution (empty = CLI default)
56
        ReviewModel           string         // model for review phases (empty = falls back to ClaudeModel)
57
        CodexEnabled          bool           // whether codex review is enabled
58
        FinalizeEnabled       bool           // whether finalize step is enabled
59
        DefaultBranch         string         // default branch name (detected from repo)
60
        AppConfig             *config.Config // full application config (for executors and prompts)
61
}
62

63
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
64
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
65
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
66
//go:generate moq -out mocks/git_checker.go -pkg mocks -skip-ensure -fmt goimports . GitChecker
67

68
// Executor runs CLI commands and returns results.
69
type Executor interface {
70
        Run(ctx context.Context, prompt string) executor.Result
71
}
72

73
// Logger provides logging functionality.
74
type Logger interface {
75
        Print(format string, args ...any)
76
        PrintRaw(format string, args ...any)
77
        PrintSection(section status.Section)
78
        PrintAligned(text string)
79
        LogQuestion(question string, options []string)
80
        LogAnswer(answer string)
81
        LogDraftReview(action string, feedback string)
82
        Path() string
83
}
84

85
// InputCollector provides interactive input collection for plan creation.
86
type InputCollector interface {
87
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
88
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
89
}
90

91
// GitChecker provides git state inspection for the review loop.
92
type GitChecker interface {
93
        HeadHash() (string, error)
94
        DiffFingerprint() (string, error)
95
}
96

97
// Executors groups the executor dependencies for the Runner.
98
type Executors struct {
99
        Claude       Executor
100
        ReviewClaude Executor                // optional: separate executor for review phases (nil = use Claude)
101
        Codex        Executor
102
        Custom       *executor.CustomExecutor
103
}
104

105
// Runner orchestrates the execution loop.
106
type Runner struct {
107
        cfg                 Config
108
        log                 Logger
109
        claude              Executor // executor for task phase
110
        reviewClaude        Executor // executor for review phases (may differ in model)
111
        codex               Executor
112
        custom              *executor.CustomExecutor
113
        git                 GitChecker
114
        inputCollector      InputCollector
115
        phaseHolder         *status.PhaseHolder
116
        iterationDelay      time.Duration
117
        taskRetryCount      int
118
        waitOnLimit         time.Duration
119
        breakCh             <-chan struct{}                 // nil = feature disabled; receives one value per break signal
120
        pauseHandler        func(ctx context.Context) bool  // called on break during task phase; true = resume, false = abort
121
        lastSessionTimedOut bool                            // set by runWithSessionTimeout, checked by review loops
122
        taskPhaseOverride   func(ctx context.Context) error // test seam: override runTaskPhase result (nil = normal execution)
123
}
124

125
// New creates a new Runner with the given configuration and shared phase holder.
126
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
127
func New(cfg Config, log Logger, holder *status.PhaseHolder) *Runner {
3✔
128
        // build claude executor with config values
3✔
129
        claudeExec := &executor.ClaudeExecutor{
3✔
130
                OutputHandler: func(text string) {
3✔
131
                        log.PrintAligned(text)
×
132
                },
×
133
                Debug: cfg.Debug,
134
        }
135
        if cfg.AppConfig != nil {
6✔
136
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
3✔
137
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
3✔
138
                claudeExec.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
3✔
139
                claudeExec.LimitPatterns = cfg.AppConfig.ClaudeLimitPatterns
3✔
140
                claudeExec.IdleTimeout = cfg.AppConfig.IdleTimeout
3✔
141
        }
3✔
142
        claudeExec.Model = cfg.ClaudeModel
3✔
143

3✔
144
        // build review executor (shares base config, may use a different model)
3✔
145
        reviewModel := cfg.ReviewModel
3✔
146
        if reviewModel == "" {
6✔
147
                reviewModel = cfg.ClaudeModel // fall back to task model
3✔
148
        }
3✔
149
        var reviewExec Executor
3✔
150
        if reviewModel != cfg.ClaudeModel {
3✔
NEW
151
                re := &executor.ClaudeExecutor{
×
NEW
152
                        OutputHandler: claudeExec.OutputHandler,
×
NEW
153
                        Debug:         cfg.Debug,
×
NEW
154
                        Model:         reviewModel,
×
NEW
155
                }
×
NEW
156
                if cfg.AppConfig != nil {
×
NEW
157
                        re.Command = cfg.AppConfig.ClaudeCommand
×
NEW
158
                        re.Args = cfg.AppConfig.ClaudeArgs
×
NEW
159
                        re.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
×
NEW
160
                        re.LimitPatterns = cfg.AppConfig.ClaudeLimitPatterns
×
NEW
161
                        re.IdleTimeout = cfg.AppConfig.IdleTimeout
×
NEW
162
                }
×
NEW
163
                reviewExec = re
×
164
        }
165

166
        // build codex executor with config values
167
        codexExec := &executor.CodexExecutor{
3✔
168
                OutputHandler: func(text string) {
3✔
169
                        log.PrintAligned(text)
×
170
                },
×
171
                Debug: cfg.Debug,
172
        }
173
        if cfg.AppConfig != nil {
6✔
174
                codexExec.Command = cfg.AppConfig.CodexCommand
3✔
175
                codexExec.Model = cfg.AppConfig.CodexModel
3✔
176
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
3✔
177
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
3✔
178
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
3✔
179
                codexExec.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
3✔
180
                codexExec.LimitPatterns = cfg.AppConfig.CodexLimitPatterns
3✔
181
        }
3✔
182

183
        // build custom executor if custom review script is configured
184
        var customExec *executor.CustomExecutor
3✔
185
        if cfg.AppConfig != nil && cfg.AppConfig.CustomReviewScript != "" {
4✔
186
                customExec = &executor.CustomExecutor{
1✔
187
                        Script: cfg.AppConfig.CustomReviewScript,
1✔
188
                        OutputHandler: func(text string) {
1✔
189
                                log.PrintAligned(text)
×
190
                        },
×
191
                        ErrorPatterns: cfg.AppConfig.CodexErrorPatterns, // reuse codex error patterns
192
                        LimitPatterns: cfg.AppConfig.CodexLimitPatterns, // reuse codex limit patterns
193
                }
194
        }
195

196
        // auto-disable codex if the binary is not installed AND we need codex
197
        // (skip this check if using custom external review tool or external review is disabled)
198
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
4✔
199
                codexCmd := codexExec.Command
1✔
200
                if codexCmd == "" {
1✔
201
                        codexCmd = "codex"
×
202
                }
×
203
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
204
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
205
                        cfg.CodexEnabled = false
1✔
206
                }
1✔
207
        }
208

209
        return NewWithExecutors(cfg, log, Executors{Claude: claudeExec, ReviewClaude: reviewExec, Codex: codexExec, Custom: customExec}, holder)
3✔
210
}
211

212
// NewWithExecutors creates a new Runner with custom executors (for testing).
213
func NewWithExecutors(cfg Config, log Logger, execs Executors, holder *status.PhaseHolder) *Runner {
132✔
214
        // determine iteration delay from config or default
132✔
215
        iterDelay := DefaultIterationDelay
132✔
216
        if cfg.IterationDelayMs > 0 {
168✔
217
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
36✔
218
        }
36✔
219

220
        // determine task retry count from config
221
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
222
        retryCount := 1
132✔
223
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
237✔
224
                retryCount = cfg.TaskRetryCount
105✔
225
        } else if cfg.TaskRetryCount > 0 {
133✔
226
                retryCount = cfg.TaskRetryCount
1✔
227
        }
1✔
228

229
        // determine wait-on-limit duration from config
230
        var waitOnLimit time.Duration
132✔
231
        if cfg.AppConfig != nil {
237✔
232
                waitOnLimit = cfg.AppConfig.WaitOnLimit
105✔
233
        }
105✔
234

235
        // if no separate review executor, use the same as task executor
236
        reviewClaude := execs.ReviewClaude
132✔
237
        if reviewClaude == nil {
264✔
238
                reviewClaude = execs.Claude
132✔
239
        }
132✔
240

241
        return &Runner{
132✔
242
                cfg:            cfg,
132✔
243
                log:            log,
132✔
244
                claude:         execs.Claude,
132✔
245
                reviewClaude:   reviewClaude,
132✔
246
                codex:          execs.Codex,
132✔
247
                custom:         execs.Custom,
132✔
248
                phaseHolder:    holder,
132✔
249
                iterationDelay: iterDelay,
132✔
250
                taskRetryCount: retryCount,
132✔
251
                waitOnLimit:    waitOnLimit,
132✔
252
        }
132✔
253
}
254

255
// SetInputCollector sets the input collector for plan creation mode.
256
func (r *Runner) SetInputCollector(c InputCollector) {
17✔
257
        r.inputCollector = c
17✔
258
}
17✔
259

260
// SetGitChecker sets the git checker for no-commit detection in review loops.
261
func (r *Runner) SetGitChecker(g GitChecker) {
8✔
262
        r.git = g
8✔
263
}
8✔
264

265
// SetBreakCh sets the break channel for manual termination of review and task loops.
266
// each value sent on the channel triggers one break event (repeatable, not close-based).
267
func (r *Runner) SetBreakCh(ch <-chan struct{}) {
7✔
268
        r.breakCh = ch
7✔
269
}
7✔
270

271
// SetPauseHandler sets the callback invoked when a break signal is received during task iteration.
272
// the handler should prompt the user and return true to resume or false to abort.
273
// if nil, break during task phase returns ErrUserAborted immediately.
274
func (r *Runner) SetPauseHandler(fn func(ctx context.Context) bool) {
3✔
275
        r.pauseHandler = fn
3✔
276
}
3✔
277

278
// Run executes the main loop based on configured mode.
279
func (r *Runner) Run(ctx context.Context) error {
88✔
280
        switch r.cfg.Mode {
88✔
281
        case ModeFull:
16✔
282
                return r.runFull(ctx)
16✔
283
        case ModeReview:
16✔
284
                return r.runReviewOnly(ctx)
16✔
285
        case ModeCodexOnly:
26✔
286
                return r.runCodexOnly(ctx)
26✔
287
        case ModeTasksOnly:
11✔
288
                return r.runTasksOnly(ctx)
11✔
289
        case ModePlan:
18✔
290
                return r.runPlanCreation(ctx)
18✔
291
        default:
1✔
292
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
293
        }
294
}
295

296
// runFull executes the complete pipeline: tasks → review → codex → review.
297
func (r *Runner) runFull(ctx context.Context) error {
16✔
298
        if r.cfg.PlanFile == "" {
17✔
299
                return errors.New("plan file required for full mode")
1✔
300
        }
1✔
301

302
        // phase 1: task execution
303
        r.phaseHolder.Set(status.PhaseTask)
15✔
304
        r.log.PrintRaw("starting task execution phase\n")
15✔
305

15✔
306
        if err := r.runTaskPhase(ctx); err != nil {
24✔
307
                if errors.Is(err, ErrUserAborted) {
10✔
308
                        r.log.Print("task phase aborted by user")
1✔
309
                        return ErrUserAborted
1✔
310
                }
1✔
311
                return fmt.Errorf("task phase: %w", err)
8✔
312
        }
313

314
        // phase 2: first review pass - address ALL findings
315
        r.phaseHolder.Set(status.PhaseReview)
6✔
316
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
6✔
317

6✔
318
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
6✔
319
                return fmt.Errorf("first review: %w", err)
×
320
        }
×
321

322
        // phase 2.1: claude review loop (critical/major) before codex
323
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
324
                return fmt.Errorf("pre-codex review loop: %w", err)
×
325
        }
×
326

327
        // phase 2.5+3: codex → post-codex review → finalize
328
        if err := r.runCodexAndPostReview(ctx); err != nil {
6✔
329
                return err
×
330
        }
×
331

332
        r.log.Print("all phases completed successfully")
6✔
333
        return nil
6✔
334
}
335

336
// runReviewOnly executes only the review pipeline: review → codex → review.
337
func (r *Runner) runReviewOnly(ctx context.Context) error {
16✔
338
        // phase 1: first review
16✔
339
        r.phaseHolder.Set(status.PhaseReview)
16✔
340
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
16✔
341

16✔
342
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
17✔
343
                return fmt.Errorf("first review: %w", err)
1✔
344
        }
1✔
345

346
        // phase 1.1: claude review loop (critical/major) before codex
347
        if err := r.runClaudeReviewLoop(ctx); err != nil {
16✔
348
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
349
        }
1✔
350

351
        // phase 2+3: codex → post-codex review → finalize
352
        if err := r.runCodexAndPostReview(ctx); err != nil {
17✔
353
                return err
3✔
354
        }
3✔
355

356
        r.log.Print("review phases completed successfully")
11✔
357
        return nil
11✔
358
}
359

360
// runCodexOnly executes only the codex pipeline: codex → review → finalize.
361
func (r *Runner) runCodexOnly(ctx context.Context) error {
26✔
362
        if err := r.runCodexAndPostReview(ctx); err != nil {
27✔
363
                return err
1✔
364
        }
1✔
365

366
        r.log.Print("codex phases completed successfully")
25✔
367
        return nil
25✔
368
}
369

370
// runCodexAndPostReview runs the shared codex → post-codex claude review → finalize pipeline.
371
// used by runFull, runReviewOnly, and runCodexOnly to avoid duplicating this sequence.
372
func (r *Runner) runCodexAndPostReview(ctx context.Context) error {
46✔
373
        // codex external review loop
46✔
374
        r.phaseHolder.Set(status.PhaseCodex)
46✔
375
        r.log.PrintSection(status.NewGenericSection("codex external review"))
46✔
376

46✔
377
        if err := r.runCodexLoop(ctx); err != nil {
49✔
378
                return fmt.Errorf("codex loop: %w", err)
3✔
379
        }
3✔
380

381
        // claude review loop (critical/major) after codex.
382
        // prepend commit-pending instruction only when external review actually ran,
383
        // because the loop may exit early (max iterations, stalemate, manual break)
384
        // leaving uncommitted fixes in the worktree.
385
        r.phaseHolder.Set(status.PhaseReview)
43✔
386

43✔
387
        var commitPrefix string
43✔
388
        if r.externalReviewTool() != "none" {
65✔
389
                commitPrefix = "IMPORTANT: Before starting the review, run `git status`. " +
22✔
390
                        "If there are uncommitted changes from previous review phases, " +
22✔
391
                        "stage and commit them with message: " +
22✔
392
                        "`fix: address code review findings`\n" +
22✔
393
                        "Then continue with the sequence below.\n\n"
22✔
394
        }
22✔
395
        if err := r.runClaudeReviewLoop(ctx, commitPrefix); err != nil {
43✔
396
                return fmt.Errorf("post-codex review loop: %w", err)
×
397
        }
×
398

399
        // optional finalize step (best-effort, but propagates context cancellation)
400
        return r.runFinalize(ctx)
43✔
401
}
402

403
// runTasksOnly executes only task phase, skipping all reviews.
404
func (r *Runner) runTasksOnly(ctx context.Context) error {
11✔
405
        if r.cfg.PlanFile == "" {
12✔
406
                return errors.New("plan file required for tasks-only mode")
1✔
407
        }
1✔
408

409
        r.phaseHolder.Set(status.PhaseTask)
10✔
410
        r.log.PrintRaw("starting task execution phase\n")
10✔
411

10✔
412
        if err := r.runTaskPhase(ctx); err != nil {
14✔
413
                if errors.Is(err, ErrUserAborted) {
7✔
414
                        r.log.Print("task phase aborted by user")
3✔
415
                        return ErrUserAborted
3✔
416
                }
3✔
417
                return fmt.Errorf("task phase: %w", err)
1✔
418
        }
419

420
        r.log.Print("task execution completed successfully")
6✔
421
        return nil
6✔
422
}
423

424
// runTaskPhase executes tasks until completion or max iterations.
425
// executes ONE Task section per iteration. supports break (Ctrl+\) with pause+resume:
426
// on break, the current session is canceled, pauseHandler is called, and on resume
427
// the same iteration re-runs with a fresh session that re-reads the plan file.
428
func (r *Runner) runTaskPhase(ctx context.Context) error {
25✔
429
        if r.taskPhaseOverride != nil {
27✔
430
                return r.taskPhaseOverride(ctx)
2✔
431
        }
2✔
432
        prompt := r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt)
23✔
433
        retryCount := 0
23✔
434

23✔
435
        for i := 1; i <= r.cfg.MaxIterations; i++ {
53✔
436
                select {
30✔
437
                case <-ctx.Done():
1✔
438
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
439
                default:
29✔
440
                }
441

442
                // use plan task position instead of loop counter for correct dashboard highlighting
443
                taskNum := i
29✔
444
                if pos := r.nextPlanTaskPosition(); pos > 0 {
32✔
445
                        taskNum = pos
3✔
446
                }
3✔
447
                r.log.PrintSection(status.NewTaskIterationSection(taskNum))
29✔
448

29✔
449
                // create per-iteration break context so Ctrl+\ cancels only the current session
29✔
450
                loopCtx, loopCancel := r.breakContext(ctx)
29✔
451

29✔
452
                result := r.runWithLimitRetry(loopCtx, r.claude.Run, prompt, "claude")
29✔
453

29✔
454
                // check break before calling loopCancel — cancel would make loopCtx.Err() non-nil
29✔
455
                manualBreak := r.isBreak(loopCtx, ctx)
29✔
456
                loopCancel()
29✔
457

29✔
458
                if manualBreak {
33✔
459
                        r.log.Print("session interrupted by break signal")
4✔
460
                        r.drainBreakCh() // clear signal that may have arrived during cancellation
4✔
461
                        if r.pauseHandler == nil || !r.pauseHandler(ctx) {
6✔
462
                                return ErrUserAborted
2✔
463
                        }
2✔
464
                        // resume: decrement i to preserve iteration budget and re-run same task
465
                        r.drainBreakCh() // clear any signal received during pause prompt
2✔
466
                        i--
2✔
467
                        retryCount = 0
2✔
468
                        continue
2✔
469
                }
470

471
                if result.Error != nil {
28✔
472
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
5✔
473
                                return err
2✔
474
                        }
2✔
475
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
476
                }
477

478
                if result.Signal == SignalCompleted {
34✔
479
                        // verify plan actually has no uncompleted checkboxes
12✔
480
                        if r.hasUncompletedTasks() {
12✔
481
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
482
                                continue
×
483
                        }
484
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
12✔
485
                        return nil
12✔
486
                }
487

488
                if result.Signal == SignalFailed {
15✔
489
                        if retryCount < r.taskRetryCount {
7✔
490
                                r.log.Print("task failed, retrying...")
2✔
491
                                retryCount++
2✔
492
                                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
493
                                        return fmt.Errorf("interrupted: %w", err)
×
494
                                }
×
495
                                continue
2✔
496
                        }
497
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
498
                }
499

500
                retryCount = 0
5✔
501
                // continue with same prompt - it reads from plan file each time
5✔
502
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
503
                        return fmt.Errorf("interrupted: %w", err)
1✔
504
                }
1✔
505
        }
506

507
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
508
}
509

510
// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
511
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
22✔
512
        result := r.runWithLimitRetry(ctx, r.reviewClaude.Run, prompt, "claude")
22✔
513
        if result.Error != nil {
22✔
514
                if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
×
515
                        return err
×
516
                }
×
517
                return fmt.Errorf("claude execution: %w", result.Error)
×
518
        }
519

520
        if result.Signal == SignalFailed {
23✔
521
                return errors.New("review failed (FAILED signal received)")
1✔
522
        }
1✔
523

524
        if !isReviewDone(result.Signal) {
22✔
525
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
1✔
526
        }
1✔
527

528
        return nil
21✔
529
}
530

531
// runClaudeReviewLoop runs claude review iterations using second review prompt.
532
// optional promptPrefix is prepended to the review prompt (used for commit-pending instruction after codex).
533
func (r *Runner) runClaudeReviewLoop(ctx context.Context, promptPrefix ...string) error {
64✔
534
        // review iterations = 10% of max_iterations
64✔
535
        maxReviewIterations := max(minReviewIterations, r.cfg.MaxIterations/reviewIterationDivisor)
64✔
536

64✔
537
        prefix := ""
64✔
538
        if len(promptPrefix) > 0 {
107✔
539
                prefix = promptPrefix[0]
43✔
540
        }
43✔
541

542
        for i := 1; i <= maxReviewIterations; i++ {
135✔
543
                select {
71✔
544
                case <-ctx.Done():
×
545
                        return fmt.Errorf("review: %w", ctx.Err())
×
546
                default:
71✔
547
                }
548

549
                r.log.PrintSection(status.NewClaudeReviewSection(i, ": critical/major"))
71✔
550

71✔
551
                // capture HEAD hash before running claude for no-commit detection
71✔
552
                headBefore := r.headHash()
71✔
553

71✔
554
                result := r.runWithLimitRetry(ctx, r.reviewClaude.Run,
71✔
555
                        prefix+r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt), "claude")
71✔
556
                if result.Error != nil {
72✔
557
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
2✔
558
                                return err
1✔
559
                        }
1✔
560
                        return fmt.Errorf("claude execution: %w", result.Error)
×
561
                }
562

563
                if result.Signal == SignalFailed {
70✔
564
                        return errors.New("review failed (FAILED signal received)")
×
565
                }
×
566

567
                if isReviewDone(result.Signal) {
130✔
568
                        r.log.Print("claude review complete - no more findings")
60✔
569
                        return nil
60✔
570
                }
60✔
571

572
                // on session timeout, skip HEAD check and retry; the session was killed before
573
                // it could finish, so "no changes" doesn't mean "nothing to fix"
574
                if r.lastSessionTimedOut {
12✔
575
                        r.log.Print("session timed out, retrying review iteration...")
2✔
576
                        continue
2✔
577
                }
578

579
                // fallback: if HEAD hash hasn't changed, claude found nothing to fix
580
                if headBefore != "" {
10✔
581
                        if headAfter := r.headHash(); headAfter == headBefore {
3✔
582
                                r.log.Print("claude review complete - no changes detected")
1✔
583
                                return nil
1✔
584
                        }
1✔
585
                }
586

587
                r.log.Print("issues fixed, running another review iteration...")
7✔
588
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
7✔
589
                        return fmt.Errorf("interrupted: %w", err)
×
590
                }
×
591
        }
592

593
        r.log.Print("max claude review iterations reached, continuing...")
2✔
594
        return nil
2✔
595
}
596

597
// headHash returns the current HEAD commit hash, or empty string if unavailable.
598
func (r *Runner) headHash() string {
93✔
599
        if r.git == nil {
153✔
600
                return ""
60✔
601
        }
60✔
602
        hash, err := r.git.HeadHash()
33✔
603
        if err != nil {
37✔
604
                r.log.Print("warning: failed to get HEAD hash: %v", err)
4✔
605
                return ""
4✔
606
        }
4✔
607
        return hash
29✔
608
}
609

610
// diffFingerprint returns a hash of the current working tree diff, or empty string if unavailable.
611
func (r *Runner) diffFingerprint() string {
20✔
612
        if r.git == nil {
23✔
613
                return ""
3✔
614
        }
3✔
615
        fp, err := r.git.DiffFingerprint()
17✔
616
        if err != nil {
17✔
617
                r.log.Print("warning: failed to get diff fingerprint: %v", err)
×
618
                return ""
×
619
        }
×
620
        return fp
17✔
621
}
622

623
// checkStalemate compares git state before and after claude evaluation to detect unchanged rounds.
624
// returns the updated unchanged round counter: incremented if no changes detected, reset to 0 otherwise.
625
// when diff fingerprints are unavailable (error), falls back to HEAD-only comparison.
626
func (r *Runner) checkStalemate(headBefore, headAfter, diffBefore, diffAfter string, unchangedRounds int) int {
6✔
627
        unchanged := headAfter == headBefore
6✔
628
        if diffBefore != "" && diffAfter != "" {
12✔
629
                unchanged = unchanged && diffAfter == diffBefore
6✔
630
        }
6✔
631
        if unchanged {
10✔
632
                return unchangedRounds + 1
4✔
633
        }
4✔
634
        return 0
2✔
635
}
636

637
// updateStalemate checks if review patience is enabled, computes the "after" git state,
638
// and returns the updated unchanged-rounds counter plus a flag indicating stalemate.
639
// skips the update if "after" values are empty (transient git error) to avoid resetting the counter.
640
func (r *Runner) updateStalemate(headBefore, diffBefore string, unchangedRounds int) (int, bool) {
17✔
641
        if r.cfg.ReviewPatience <= 0 || headBefore == "" {
28✔
642
                return unchangedRounds, false
11✔
643
        }
11✔
644
        // skip stalemate update if "after" values are empty (transient git error),
645
        // so errors don't reset unchangedRounds and inadvertently disable early exit
646
        if headAfter, diffAfter := r.headHash(), r.diffFingerprint(); headAfter != "" && diffAfter != "" {
12✔
647
                unchangedRounds = r.checkStalemate(headBefore, headAfter, diffBefore, diffAfter, unchangedRounds)
6✔
648
        }
6✔
649
        if unchangedRounds >= r.cfg.ReviewPatience {
7✔
650
                r.log.Print("stalemate detected after %d unchanged rounds, external review terminated early", unchangedRounds)
1✔
651
                return unchangedRounds, true
1✔
652
        }
1✔
653
        return unchangedRounds, false
5✔
654
}
655

656
// externalReviewTool returns the effective external review tool to use.
657
// handles backward compatibility: codex_enabled = false → "none"
658
// the CodexEnabled flag takes precedence for backward compatibility.
659
func (r *Runner) externalReviewTool() string {
89✔
660
        // backward compatibility: codex_enabled = false means no external review
89✔
661
        // this takes precedence over external_review_tool setting
89✔
662
        if !r.cfg.CodexEnabled {
129✔
663
                return "none"
40✔
664
        }
40✔
665

666
        // check explicit external_review_tool setting
667
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
98✔
668
                return r.cfg.AppConfig.ExternalReviewTool
49✔
669
        }
49✔
670

671
        // default to codex
672
        return "codex"
×
673
}
674

675
// runCodexLoop runs the external review loop (codex or custom) until no findings.
676
func (r *Runner) runCodexLoop(ctx context.Context) error {
46✔
677
        tool := r.externalReviewTool()
46✔
678

46✔
679
        // skip external review phase if disabled
46✔
680
        if tool == "none" {
67✔
681
                r.log.Print("external review disabled, skipping...")
21✔
682
                return nil
21✔
683
        }
21✔
684

685
        // custom review tool
686
        if tool == "custom" {
28✔
687
                if r.custom == nil {
4✔
688
                        return errors.New("custom review script not configured")
1✔
689
                }
1✔
690
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
2✔
691
                        name:            "custom",
2✔
692
                        runReview:       func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
4✔
693
                        buildPrompt:     r.buildCustomReviewPrompt,
694
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
695
                        showSummary:     func(string) {}, // no-op: custom output already streamed via OutputHandler
2✔
696
                        makeSection:     status.NewCustomIterationSection,
697
                })
698
        }
699

700
        // default: codex review
701
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
22✔
702
                name:            "codex",
22✔
703
                runReview:       r.codex.Run,
22✔
704
                buildPrompt:     r.buildCodexPrompt,
22✔
705
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
22✔
706
                showSummary:     r.showCodexSummary,
22✔
707
                makeSection:     status.NewCodexIterationSection,
22✔
708
        })
22✔
709
}
710

711
// externalReviewConfig holds callbacks for running an external review tool.
712
type externalReviewConfig struct {
713
        name            string                                                   // tool name for error messages
714
        runReview       func(ctx context.Context, prompt string) executor.Result // run the external review tool
715
        buildPrompt     func(isFirst bool, claudeResponse string) string         // build prompt for review tool
716
        buildEvalPrompt func(output string) string                               // build evaluation prompt for claude
717
        showSummary     func(output string)                                      // display review findings summary
718
        makeSection     func(iteration int) status.Section                       // create section header
719
}
720

721
// runExternalReviewLoop runs a generic external review tool-claude loop.
722
// it terminates when no findings remain, max iterations are reached,
723
// stalemate is detected (review patience), or a manual break is requested.
724
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) error {
24✔
725
        maxIterations := max(minCodexIterations, r.cfg.MaxIterations/codexIterationDivisor)
24✔
726
        if r.cfg.MaxExternalIterations > 0 {
29✔
727
                maxIterations = r.cfg.MaxExternalIterations
5✔
728
        }
5✔
729

730
        // derive a child context that cancels when break channel fires
731
        loopCtx, loopCancel := r.breakContext(ctx)
24✔
732
        defer loopCancel()
24✔
733

24✔
734
        var claudeResponse string // first iteration has no prior response
24✔
735
        var unchangedRounds int   // consecutive iterations with no commits (for stalemate detection)
24✔
736
        firstCompleted := false   // tracks if any successful eval completed; controls diff scope for external tool
24✔
737

24✔
738
        for i := 1; i <= maxIterations; i++ {
63✔
739
                select {
39✔
740
                case <-loopCtx.Done():
×
741
                        if r.isBreak(loopCtx, ctx) {
×
742
                                r.log.Print("manual break requested, external review terminated early")
×
743
                                return nil
×
744
                        }
×
745
                        return fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
746
                default:
39✔
747
                }
748

749
                r.log.PrintSection(cfg.makeSection(i))
39✔
750

39✔
751
                // run external review tool. use branch-wide diff until a successful claude eval completes,
39✔
752
                // so that a timeout on the first eval doesn't narrow subsequent reviews to working-tree only
39✔
753
                reviewResult := r.runWithLimitRetry(loopCtx, cfg.runReview, cfg.buildPrompt(!firstCompleted, claudeResponse), cfg.name)
39✔
754
                if reviewResult.Error != nil {
42✔
755
                        if r.isBreak(loopCtx, ctx) {
4✔
756
                                r.log.Print("manual break requested, external review terminated early")
1✔
757
                                return nil
1✔
758
                        }
1✔
759
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
760
                                return err
1✔
761
                        }
1✔
762
                        return fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
763
                }
764

765
                if reviewResult.Output == "" {
38✔
766
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
2✔
767
                        break
2✔
768
                }
769

770
                // show findings summary before Claude evaluation
771
                cfg.showSummary(reviewResult.Output)
34✔
772

34✔
773
                // capture state before claude evaluation for stalemate detection (only when enabled)
34✔
774
                var headBefore, diffBefore string
34✔
775
                if r.cfg.ReviewPatience > 0 {
48✔
776
                        headBefore = r.headHash()
14✔
777
                        diffBefore = r.diffFingerprint()
14✔
778
                }
14✔
779

780
                // pass output to claude for evaluation and fixing
781
                r.phaseHolder.Set(status.PhaseClaudeEval)
34✔
782
                r.log.PrintSection(status.NewClaudeEvalSection())
34✔
783
                claudeResult := r.runWithLimitRetry(loopCtx, r.reviewClaude.Run, cfg.buildEvalPrompt(reviewResult.Output), "claude")
34✔
784

34✔
785
                // restore codex phase for next iteration
34✔
786
                r.phaseHolder.Set(status.PhaseCodex)
34✔
787
                if claudeResult.Error != nil {
34✔
788
                        if r.isBreak(loopCtx, ctx) {
×
789
                                r.log.Print("manual break requested, external review terminated early")
×
790
                                return nil
×
791
                        }
×
792
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
793
                                return err
×
794
                        }
×
795
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
×
796
                }
797

798
                // on session timeout, skip response capture and stalemate detection; the session was killed
799
                // before it could finish, so partial output can't be trusted as previous context and
800
                // "no changes" doesn't mean "nothing to fix"
801
                if r.lastSessionTimedOut {
37✔
802
                        r.log.Print("claude eval session timed out, retrying %s iteration...", cfg.name)
3✔
803
                        continue
3✔
804
                }
805

806
                firstCompleted = true // successful eval completed, next iteration can use working-tree diff
31✔
807
                claudeResponse = claudeResult.Output
31✔
808

31✔
809
                // exit only when claude sees "no findings"
31✔
810
                if isCodexDone(claudeResult.Signal) {
45✔
811
                        r.log.Print("%s review complete - no more findings", cfg.name)
14✔
812
                        return nil
14✔
813
                }
14✔
814

815
                // stalemate detection: track consecutive rounds with no changes (commits or working tree edits).
816
                // the eval prompt tells claude not to commit during fix rounds, so HEAD alone can't distinguish
817
                // "rejected findings" from "made fixes without commit". checking the diff fingerprint catches
818
                // working tree edits, making the detection accurate for both cases.
819
                var stalemate bool
17✔
820
                unchangedRounds, stalemate = r.updateStalemate(headBefore, diffBefore, unchangedRounds)
17✔
821
                if stalemate {
18✔
822
                        return nil
1✔
823
                }
1✔
824

825
                if err := r.sleepWithContext(loopCtx, r.iterationDelay); err != nil {
16✔
826
                        if r.isBreak(loopCtx, ctx) {
×
827
                                r.log.Print("manual break requested, external review terminated early")
×
828
                                return nil
×
829
                        }
×
830
                        return fmt.Errorf("interrupted: %w", err)
×
831
                }
832
        }
833

834
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
6✔
835
        return nil
6✔
836
}
837

838
// breakContext derives a child context that cancels when one value is drained from the break channel.
839
// if no break channel is configured, returns the parent context and a no-op cancel.
840
// callers detect break by checking loopCtx.Err() != nil && parentCtx.Err() == nil.
841
func (r *Runner) breakContext(parent context.Context) (context.Context, context.CancelFunc) {
53✔
842
        if r.breakCh == nil {
99✔
843
                return parent, func() {}
92✔
844
        }
845
        ctx, cancel := context.WithCancel(parent)
7✔
846
        go func() {
14✔
847
                select {
7✔
848
                case <-r.breakCh:
5✔
849
                        cancel()
5✔
850
                case <-ctx.Done():
2✔
851
                }
852
        }()
853
        return ctx, cancel
7✔
854
}
855

856
// isBreak returns true if the loop context was canceled by a break signal
857
// while the parent context is still alive. does not read from the break channel,
858
// so it can be called without consuming a pending signal.
859
func (r *Runner) isBreak(loopCtx, parentCtx context.Context) bool {
32✔
860
        return loopCtx.Err() != nil && parentCtx.Err() == nil
32✔
861
}
32✔
862

863
// drainBreakCh does a non-blocking drain of one pending value from the break channel.
864
// called after pause+resume to prevent a SIGQUIT received during the pause prompt
865
// from immediately canceling the next iteration. not called on normal iteration
866
// boundaries so that a legitimate Ctrl+\ between iterations is preserved.
867
func (r *Runner) drainBreakCh() {
9✔
868
        if r.breakCh == nil {
10✔
869
                return
1✔
870
        }
1✔
871
        select {
8✔
872
        case <-r.breakCh:
2✔
873
        default:
6✔
874
        }
875
}
876

877
// buildCodexPrompt creates the prompt for codex review.
878
// uses the codex_review prompt loaded from config with all variables expanded,
879
// including {{PREVIOUS_REVIEW_CONTEXT}} for iteration context.
880
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
45✔
881
        return r.replaceVariablesWithIteration(r.cfg.AppConfig.CodexReviewPrompt, isFirst, claudeResponse)
45✔
882
}
45✔
883

884
// hasUncompletedTasks checks if any Task section has uncompleted checkboxes.
885
// only Task sections (### Task N: or ### Iteration N:) are considered.
886
// checkboxes in Success criteria, Overview, or Context are ignored for this check,
887
// so the agent can output ALL_TASKS_DONE when those are verification-only.
888
// for malformed plans (checkboxes without task headers), returns true if any [ ] exists.
889
func (r *Runner) hasUncompletedTasks() bool {
22✔
890
        path := r.resolvePlanFilePath()
22✔
891
        if path == "" {
22✔
892
                return false // no plan file, nothing to complete
×
893
        }
×
894
        p, err := plan.ParsePlanFile(path)
22✔
895
        if err != nil {
22✔
896
                r.log.Print("[WARN] failed to parse plan file for completion check: %v", err)
×
897
                return true // assume incomplete if can't read
×
898
        }
×
899
        for _, t := range p.Tasks {
37✔
900
                if t.HasUncompletedActionableWork() {
19✔
901
                        return true
4✔
902
                }
4✔
903
        }
904
        // malformed plans: no task headers but file has [ ] — treat as incomplete
905
        if len(p.Tasks) == 0 {
29✔
906
                has, err := plan.FileHasUncompletedCheckbox(path)
11✔
907
                if err != nil {
11✔
908
                        return true
×
909
                }
×
910
                if has {
12✔
911
                        return true
1✔
912
                }
1✔
913
        }
914
        return false
17✔
915
}
916

917
// nextPlanTaskPosition returns the 1-indexed position of the first uncompleted task in the plan.
918
// returns 0 if the plan file can't be read/parsed or no uncompleted tasks exist (caller falls back to loop counter).
919
func (r *Runner) nextPlanTaskPosition() int {
39✔
920
        p, err := plan.ParsePlanFile(r.resolvePlanFilePath())
39✔
921
        if err != nil {
41✔
922
                r.log.Print("[WARN] failed to parse plan file for task position: %v", err)
2✔
923
                return 0
2✔
924
        }
2✔
925
        for i, t := range p.Tasks {
60✔
926
                if t.HasUncompletedActionableWork() {
32✔
927
                        return i + 1 // 1-indexed
9✔
928
                }
9✔
929
        }
930
        return 0
28✔
931
}
932

933
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
934
// extracts text until first code block or maxCodexSummaryLen chars, whichever is shorter.
935
func (r *Runner) showCodexSummary(output string) {
32✔
936
        r.showExternalReviewSummary("codex", output)
32✔
937
}
32✔
938

939
// showExternalReviewSummary displays a condensed summary of external review output.
940
// extracts text until first code block or 5000 chars, whichever is shorter.
941
func (r *Runner) showExternalReviewSummary(toolName, output string) {
32✔
942
        summary := output
32✔
943

32✔
944
        // trim to first code block if present
32✔
945
        if idx := strings.Index(summary, "```"); idx > 0 {
32✔
946
                summary = summary[:idx]
×
947
        }
×
948

949
        // limit to maxCodexSummaryLen runes to avoid splitting multi-byte characters
950
        if runes := []rune(summary); len(runes) > maxCodexSummaryLen {
32✔
951
                summary = string(runes[:maxCodexSummaryLen]) + "..."
×
952
        }
×
953

954
        summary = strings.TrimSpace(summary)
32✔
955
        if summary == "" {
32✔
956
                return
×
957
        }
×
958

959
        r.log.Print("%s findings:", toolName)
32✔
960
        for line := range strings.SplitSeq(summary, "\n") {
64✔
961
                if strings.TrimSpace(line) == "" {
32✔
962
                        continue
×
963
                }
964
                r.log.PrintAligned("  " + line)
32✔
965
        }
966
}
967

968
// ErrUserAborted is a sentinel error returned when the user aborts or declines to resume after a break
969
// signal (Ctrl+\). it is propagated as a non-nil error so that callers (including mode entrypoints) can
970
// detect it and treat it as a clean user-initiated exit, avoiding further review/finalize steps.
971
var ErrUserAborted = errors.New("user aborted")
972

973
// ErrUserRejectedPlan is returned when user rejects the plan draft.
974
var ErrUserRejectedPlan = errors.New("user rejected plan")
975

976
// draftReviewResult holds the result of draft review handling.
977
type draftReviewResult struct {
978
        handled  bool   // true if draft was found and handled
979
        feedback string // revision feedback (non-empty only for "revise" action)
980
        err      error  // error if review failed or user rejected
981
}
982

983
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
984
// returns result indicating whether draft was handled and any feedback/errors.
985
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
16✔
986
        planContent, draftErr := parsePlanDraftPayload(output)
16✔
987
        if draftErr != nil {
25✔
988
                // log malformed signals (but not "no signal" which is expected)
9✔
989
                if !errors.Is(draftErr, errNoPlanDraftSignal) {
10✔
990
                        r.log.Print("warning: %v", draftErr)
1✔
991
                }
1✔
992
                return draftReviewResult{handled: false}
9✔
993
        }
994

995
        r.log.Print("plan draft ready for review")
7✔
996

7✔
997
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
7✔
998
        if askErr != nil {
8✔
999
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
1000
        }
1✔
1001

1002
        // log the draft review action and feedback to progress file
1003
        r.log.LogDraftReview(action, feedback)
6✔
1004

6✔
1005
        switch action {
6✔
1006
        case "accept":
3✔
1007
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
1008
                return draftReviewResult{handled: true}
3✔
1009
        case "revise":
2✔
1010
                r.log.Print("revision requested, re-running with feedback...")
2✔
1011
                return draftReviewResult{handled: true, feedback: feedback}
2✔
1012
        case "reject":
1✔
1013
                r.log.Print("plan rejected by user")
1✔
1014
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
1015
        }
1016

1017
        return draftReviewResult{handled: true}
×
1018
}
1019

1020
// handlePlanQuestion processes QUESTION signal if present in output.
1021
// returns true if question was found and handled, false otherwise.
1022
// returns error if question handling failed.
1023
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
1024
        question, err := parseQuestionPayload(output)
9✔
1025
        if err != nil {
15✔
1026
                // log malformed signals (but not "no signal" which is expected)
6✔
1027
                if !errors.Is(err, errNoQuestionSignal) {
6✔
1028
                        r.log.Print("warning: %v", err)
×
1029
                }
×
1030
                return false, nil
6✔
1031
        }
1032

1033
        r.log.LogQuestion(question.Question, question.Options)
3✔
1034

3✔
1035
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
1036
        if askErr != nil {
4✔
1037
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
1038
        }
1✔
1039

1040
        r.log.LogAnswer(answer)
2✔
1041
        return true, nil
2✔
1042
}
1043

1044
// runPlanCreation executes the interactive plan creation loop.
1045
// the loop continues until PLAN_READY signal or max iterations reached.
1046
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
1047
func (r *Runner) runPlanCreation(ctx context.Context) error {
18✔
1048
        if r.cfg.PlanDescription == "" {
19✔
1049
                return errors.New("plan description required for plan mode")
1✔
1050
        }
1✔
1051
        if r.inputCollector == nil {
18✔
1052
                return errors.New("input collector required for plan mode")
1✔
1053
        }
1✔
1054

1055
        r.phaseHolder.Set(status.PhasePlan)
16✔
1056
        r.log.PrintRaw("starting interactive plan creation\n")
16✔
1057
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
16✔
1058

16✔
1059
        // plan iterations use 20% of max_iterations
16✔
1060
        maxPlanIterations := max(minPlanIterations, r.cfg.MaxIterations/planIterationDivisor)
16✔
1061

16✔
1062
        // track revision feedback for context in next iteration
16✔
1063
        var lastRevisionFeedback string
16✔
1064

16✔
1065
        for i := 1; i <= maxPlanIterations; i++ {
46✔
1066
                select {
30✔
1067
                case <-ctx.Done():
1✔
1068
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
1069
                default:
29✔
1070
                }
1071

1072
                r.log.PrintSection(status.NewPlanIterationSection(i))
29✔
1073

29✔
1074
                prompt := r.buildPlanPrompt()
29✔
1075
                // append revision feedback context if present
29✔
1076
                hadFeedback := lastRevisionFeedback != ""
29✔
1077
                if hadFeedback {
32✔
1078
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
3✔
1079
                }
3✔
1080

1081
                result := r.runWithLimitRetry(ctx, r.claude.Run, prompt, "claude")
29✔
1082
                if result.Error != nil {
31✔
1083
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
1084
                                return err
1✔
1085
                        }
1✔
1086
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
1087
                }
1088

1089
                if result.Signal == SignalFailed {
28✔
1090
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
1091
                }
1✔
1092

1093
                // check for PLAN_READY signal
1094
                if isPlanReady(result.Signal) {
34✔
1095
                        r.log.Print("plan creation completed")
8✔
1096
                        return nil
8✔
1097
                }
8✔
1098

1099
                // on session timeout, skip output parsing and retry; the session was killed before
1100
                // it could finish, so partial output may contain truncated PLAN_DRAFT or QUESTION markers.
1101
                // preserve lastRevisionFeedback so the next attempt re-sends the user's revision request
1102
                if r.lastSessionTimedOut {
20✔
1103
                        r.log.Print("plan creation session timed out, retrying iteration...")
2✔
1104
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1105
                                return fmt.Errorf("interrupted: %w", err)
×
1106
                        }
×
1107
                        continue
2✔
1108
                }
1109

1110
                // session completed successfully, clear revision feedback since it was consumed
1111
                if hadFeedback {
17✔
1112
                        lastRevisionFeedback = ""
1✔
1113
                }
1✔
1114

1115
                // check for PLAN_DRAFT signal - present draft for user review
1116
                draftResult := r.handlePlanDraft(ctx, result.Output)
16✔
1117
                if draftResult.err != nil {
18✔
1118
                        return draftResult.err
2✔
1119
                }
2✔
1120
                if draftResult.handled {
19✔
1121
                        lastRevisionFeedback = draftResult.feedback
5✔
1122
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
5✔
1123
                                return fmt.Errorf("interrupted: %w", err)
×
1124
                        }
×
1125
                        continue
5✔
1126
                }
1127

1128
                // check for QUESTION signal
1129
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
1130
                if err != nil {
10✔
1131
                        return err
1✔
1132
                }
1✔
1133
                if handled {
10✔
1134
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1135
                                return fmt.Errorf("interrupted: %w", err)
×
1136
                        }
×
1137
                        continue
2✔
1138
                }
1139

1140
                // no question, no draft, and no completion - continue
1141
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
1142
                        return fmt.Errorf("interrupted: %w", err)
×
1143
                }
×
1144
        }
1145

1146
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
1147
}
1148

1149
// handlePatternMatchError checks if err is a PatternMatchError or LimitPatternError and logs appropriate messages.
1150
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
1151
func (r *Runner) handlePatternMatchError(err error, tool string) error {
10✔
1152
        var patternErr *executor.PatternMatchError
10✔
1153
        if errors.As(err, &patternErr) {
14✔
1154
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
1155
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
1156
                return err
4✔
1157
        }
4✔
1158
        var limitErr *executor.LimitPatternError
6✔
1159
        if errors.As(err, &limitErr) {
8✔
1160
                r.log.Print("error: detected %q in %s output", limitErr.Pattern, tool)
2✔
1161
                r.log.Print("run '%s' for more information", limitErr.HelpCmd)
2✔
1162
                return err
2✔
1163
        }
2✔
1164
        return nil
4✔
1165
}
1166

1167
// runWithLimitRetry wraps an executor Run() call with rate limit retry logic and optional session timeout.
1168
// if the result contains a LimitPatternError and waitOnLimit > 0, it logs a message, waits, and retries.
1169
// if waitOnLimit == 0, the LimitPatternError is returned as-is (existing exit behavior).
1170
// other errors (including PatternMatchError) are returned without retry.
1171
// when SessionTimeout > 0, each run() call gets a child context with deadline.
1172
// on session timeout (child timed out but parent alive), logs a warning and returns result with error cleared.
1173
// retries indefinitely until success or context cancellation.
1174
func (r *Runner) runWithLimitRetry(ctx context.Context, run func(context.Context, string) executor.Result,
1175
        prompt, toolName string) executor.Result {
241✔
1176
        for {
487✔
1177
                result := r.runWithSessionTimeout(ctx, run, prompt, toolName)
246✔
1178
                if result.Error == nil {
468✔
1179
                        return result
222✔
1180
                }
222✔
1181

1182
                var limitErr *executor.LimitPatternError
24✔
1183
                if !errors.As(result.Error, &limitErr) {
39✔
1184
                        return result // not a limit error, return as-is
15✔
1185
                }
15✔
1186

1187
                if r.waitOnLimit <= 0 {
12✔
1188
                        return result // no wait configured, return limit error as-is
3✔
1189
                }
3✔
1190

1191
                r.log.Print("rate limit detected: %q in %s output, waiting %s before retry...",
6✔
1192
                        limitErr.Pattern, toolName, r.waitOnLimit)
6✔
1193

6✔
1194
                if err := r.sleepWithContext(ctx, r.waitOnLimit); err != nil {
7✔
1195
                        return executor.Result{Error: fmt.Errorf("interrupted during limit wait: %w", ctx.Err())}
1✔
1196
                }
1✔
1197
        }
1198
}
1199

1200
// runWithSessionTimeout runs the executor with an optional session timeout.
1201
// if SessionTimeout > 0 and toolName is "claude", wraps ctx with context.WithTimeout before calling run.
1202
// on session timeout (child timed out but parent alive), logs a warning and clears the error
1203
// so callers treat it as a non-completing iteration that continues naturally.
1204
// only applies to claude sessions; codex and custom executors are not affected.
1205
func (r *Runner) runWithSessionTimeout(ctx context.Context, run func(context.Context, string) executor.Result,
1206
        prompt, toolName string) executor.Result {
251✔
1207
        r.lastSessionTimedOut = false
251✔
1208
        sessionTimeout := r.sessionTimeout()
251✔
1209
        if sessionTimeout <= 0 || toolName != "claude" {
477✔
1210
                result := run(ctx, prompt) // no timeout configured or non-claude tool
226✔
1211
                // idle timeout without signal looks like "nothing to fix" to review loops;
226✔
1212
                // treat it like session timeout so they retry instead of exiting.
226✔
1213
                if result.IdleTimedOut && result.Signal == "" {
227✔
1214
                        r.log.Print("warning: %s session idle timed out, no output activity detected", toolName)
1✔
1215
                        r.lastSessionTimedOut = true
1✔
1216
                }
1✔
1217
                return result
226✔
1218
        }
1219

1220
        childCtx, cancel := context.WithTimeout(ctx, sessionTimeout)
25✔
1221
        defer cancel()
25✔
1222

25✔
1223
        result := run(childCtx, prompt)
25✔
1224

25✔
1225
        // check if this was a session timeout: child context expired but parent is still alive.
25✔
1226
        // clear the error so callers (task loop, review loop) treat it as a non-completing iteration
25✔
1227
        // rather than aborting the phase. set lastSessionTimedOut so review loops can distinguish
25✔
1228
        // timeout from "genuinely found nothing" and continue instead of exiting.
25✔
1229
        if childCtx.Err() != nil && ctx.Err() == nil {
36✔
1230
                r.log.Print("warning: %s session timed out after %s, the agent may have started a blocking operation",
11✔
1231
                        toolName, sessionTimeout)
11✔
1232
                result.Error = nil
11✔
1233
                result.Signal = "" // clear any signal emitted before timeout; can't trust partial session
11✔
1234
                r.lastSessionTimedOut = true
11✔
1235
        } else if result.IdleTimedOut && result.Signal == "" {
25✔
1236
                // idle timeout without signal: same treatment as session timeout for review loops
×
1237
                r.log.Print("warning: %s session idle timed out, no output activity detected", toolName)
×
1238
                r.lastSessionTimedOut = true
×
1239
        }
×
1240

1241
        return result
25✔
1242
}
1243

1244
// sessionTimeout returns the configured session timeout duration.
1245
// returns 0 if not configured or AppConfig is nil.
1246
func (r *Runner) sessionTimeout() time.Duration {
251✔
1247
        if r.cfg.AppConfig == nil {
251✔
1248
                return 0
×
1249
        }
×
1250
        return r.cfg.AppConfig.SessionTimeout
251✔
1251
}
1252

1253
// runFinalize executes the optional finalize step after successful reviews.
1254
// runs once, best-effort: failures are logged but don't block success.
1255
// exception: context cancellation is propagated (user wants to abort).
1256
func (r *Runner) runFinalize(ctx context.Context) error {
43✔
1257
        if !r.cfg.FinalizeEnabled {
76✔
1258
                return nil
33✔
1259
        }
33✔
1260

1261
        r.phaseHolder.Set(status.PhaseFinalize)
10✔
1262
        r.log.PrintSection(status.NewGenericSection("finalize step"))
10✔
1263

10✔
1264
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
10✔
1265
        result := r.runWithLimitRetry(ctx, r.reviewClaude.Run, prompt, "claude")
10✔
1266

10✔
1267
        if result.Error != nil {
13✔
1268
                // propagate context cancellation - user wants to abort
3✔
1269
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
4✔
1270
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
1271
                }
1✔
1272
                // pattern match (rate limit or error) - log via shared helper, but don't fail (best-effort)
1273
                if r.handlePatternMatchError(result.Error, "claude") != nil {
3✔
1274
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
1✔
1275
                }
1✔
1276
                // best-effort: log error but don't fail
1277
                r.log.Print("finalize step failed: %v", result.Error)
1✔
1278
                return nil
1✔
1279
        }
1280

1281
        if result.Signal == SignalFailed {
8✔
1282
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
1283
                return nil
1✔
1284
        }
1✔
1285

1286
        r.log.Print("finalize step completed")
6✔
1287
        return nil
6✔
1288
}
1289

1290
// sleepWithContext pauses for the given duration but returns immediately if context is canceled.
1291
// returns ctx.Err() on cancellation, nil on normal completion.
1292
func (r *Runner) sleepWithContext(ctx context.Context, d time.Duration) error {
51✔
1293
        t := time.NewTimer(d)
51✔
1294
        defer t.Stop()
51✔
1295
        select {
51✔
1296
        case <-t.C:
49✔
1297
                return nil
49✔
1298
        case <-ctx.Done():
2✔
1299
                return fmt.Errorf("sleep interrupted: %w", ctx.Err())
2✔
1300
        }
1301
}
1302

1303
// needsCodexBinary returns true if the current configuration requires the codex binary.
1304
// returns false when external_review_tool is "custom" or "none", since codex isn't used.
1305
func needsCodexBinary(appConfig *config.Config) bool {
3✔
1306
        if appConfig == nil {
3✔
1307
                return true // default behavior assumes codex
×
1308
        }
×
1309
        switch appConfig.ExternalReviewTool {
3✔
1310
        case "custom", "none":
2✔
1311
                return false
2✔
1312
        default:
1✔
1313
                return true // "codex" or empty (default) requires codex binary
1✔
1314
        }
1315
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc