• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 25178291348

30 Apr 2026 04:56PM UTC coverage: 82.616% (+0.07%) from 82.55%
25178291348

push

github

web-flow
feat: add provider override CLI flags (#314)

* gemini creates Agents.md automatically

* add plan: claude-command-external-review-tool-flags

* feat: add provider override cli flags

* feat: add provider override CLI tests

* feat: document provider override flags

* feat: verify provider flag acceptance criteria

* feat: update provider flag plan lifecycle

* fix: address code review findings

* refactor: drop underscore CLI aliases for provider override flags

Removes hidden --claude_command, --claude_args, --external_review_tool,
and --custom_review_script aliases. They were the only underscore-spelled
CLI flags in ralphex and didn't justify the alias fields, *Set bools,
conflict validation, and test matrix needed to support them. The hyphen
flags and the underscored config keys are unchanged.

* refactor: stop silently flipping codex_enabled on CLI external-review override

A CLI flag mutating an unrelated config field is surprising. Push the
explicitness signal down instead: processor.Config gains an
ExternalReviewToolSet flag, Runner.externalReviewTool() honors an
explicit choice over the legacy codex_enabled=false back-compat path,
and applyCLIOverrides only writes the field the user actually set.

Legacy behavior unchanged: a config with only codex_enabled=false (no
explicit external_review_tool from CLI) still resolves to "none".

29 of 29 new or added lines in 3 files covered. (100.0%)

6 existing lines in 3 files now uncovered.

6810 of 8243 relevant lines covered (82.62%)

220.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.69
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "os/exec"
9
        "strings"
10
        "time"
11

12
        "github.com/umputun/ralphex/pkg/config"
13
        "github.com/umputun/ralphex/pkg/executor"
14
        "github.com/umputun/ralphex/pkg/plan"
15
        "github.com/umputun/ralphex/pkg/status"
16
)
17

18
// DefaultIterationDelay is the pause between iterations to allow system to settle.
19
const DefaultIterationDelay = 2 * time.Second
20

21
const (
22
        minReviewIterations    = 3    // minimum claude review iterations
23
        reviewIterationDivisor = 10   // review iterations = max_iterations / divisor
24
        minCodexIterations     = 3    // minimum codex review iterations
25
        codexIterationDivisor  = 5    // codex iterations = max_iterations / divisor
26
        minPlanIterations      = 5    // minimum plan creation iterations
27
        planIterationDivisor   = 5    // plan iterations = max_iterations / divisor
28
        maxCodexSummaryLen     = 5000 // max chars for codex output summary
29
)
30

31
// Mode represents the execution mode.
32
type Mode string
33

34
const (
35
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
36
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
37
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
38
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
39
        ModePlan      Mode = "plan"       // interactive plan creation mode
40
)
41

42
// Config holds runner configuration.
43
type Config struct {
44
        PlanFile              string         // path to plan file (required for full mode)
45
        PlanDescription       string         // plan description for interactive plan creation mode
46
        ProgressPath          string         // path to progress file
47
        Mode                  Mode           // execution mode
48
        MaxIterations         int            // maximum iterations for task phase
49
        MaxExternalIterations int            // override external review iteration limit (0 = auto)
50
        ReviewPatience        int            // terminate external review after N unchanged rounds (0 = disabled)
51
        Debug                 bool           // enable debug output
52
        NoColor               bool           // disable color output
53
        IterationDelayMs      int            // delay between iterations in milliseconds
54
        TaskRetryCount        int            // number of times to retry failed tasks
55
        TaskModel             string         // model[:effort] spec for task execution; parsed via ParseModelEffort (empty = CLI defaults)
56
        ReviewModel           string         // model[:effort] spec for review phases; empty falls back to TaskModel
57
        CodexEnabled          bool           // whether codex review is enabled
58
        ExternalReviewToolSet bool           // when true, AppConfig.ExternalReviewTool is an explicit choice that overrides legacy codex_enabled=false back-compat
59
        FinalizeEnabled       bool           // whether finalize step is enabled
60
        DefaultBranch         string         // default branch name (detected from repo)
61
        AppConfig             *config.Config // full application config (for executors and prompts)
62
}
63

64
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
65
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
66
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
67
//go:generate moq -out mocks/git_checker.go -pkg mocks -skip-ensure -fmt goimports . GitChecker
68

69
// Executor runs CLI commands and returns results.
70
type Executor interface {
71
        Run(ctx context.Context, prompt string) executor.Result
72
}
73

74
// Logger provides logging functionality.
75
type Logger interface {
76
        Print(format string, args ...any)
77
        PrintRaw(format string, args ...any)
78
        PrintSection(section status.Section)
79
        PrintAligned(text string)
80
        LogQuestion(question string, options []string)
81
        LogAnswer(answer string)
82
        LogDraftReview(action string, feedback string)
83
        Path() string
84
}
85

86
// InputCollector provides interactive input collection for plan creation.
87
type InputCollector interface {
88
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
89
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
90
}
91

92
// GitChecker provides git state inspection for the review loop.
93
type GitChecker interface {
94
        HeadHash() (string, error)
95
        DiffFingerprint() (string, error)
96
}
97

98
// Executors groups the executor dependencies for the Runner.
99
type Executors struct {
100
        Claude       Executor
101
        ReviewClaude Executor // optional: separate executor for review phases (nil = use Claude)
102
        Codex        Executor
103
        Custom       *executor.CustomExecutor
104
}
105

106
// Runner orchestrates the execution loop.
107
type Runner struct {
108
        cfg                 Config
109
        log                 Logger
110
        claude              Executor // executor for task phase
111
        reviewClaude        Executor // executor for review phases (may differ in model)
112
        codex               Executor
113
        custom              *executor.CustomExecutor
114
        git                 GitChecker
115
        inputCollector      InputCollector
116
        phaseHolder         *status.PhaseHolder
117
        iterationDelay      time.Duration
118
        taskRetryCount      int
119
        waitOnLimit         time.Duration
120
        breakCh             <-chan struct{}                 // nil = feature disabled; receives one value per break signal
121
        pauseHandler        func(ctx context.Context) bool  // called on break during task phase; true = resume, false = abort
122
        lastSessionTimedOut bool                            // set by runWithSessionTimeout, checked by review loops
123
        taskPhaseOverride   func(ctx context.Context) error // test seam: override runTaskPhase result (nil = normal execution)
124
}
125

126
// New creates a new Runner with the given configuration and shared phase holder.
127
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
128
func New(cfg Config, log Logger, holder *status.PhaseHolder) *Runner {
10✔
129
        // build claude executor with config values
10✔
130
        claudeExec := &executor.ClaudeExecutor{
10✔
131
                OutputHandler: func(text string) {
10✔
132
                        log.PrintAligned(text)
×
133
                },
×
134
                Debug: cfg.Debug,
135
        }
136
        if cfg.AppConfig != nil {
20✔
137
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
10✔
138
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
10✔
139
                claudeExec.ArgsSet = cfg.AppConfig.ClaudeArgsSet
10✔
140
                claudeExec.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
10✔
141
                claudeExec.LimitPatterns = cfg.AppConfig.ClaudeLimitPatterns
10✔
142
                claudeExec.IdleTimeout = cfg.AppConfig.IdleTimeout
10✔
143
        }
10✔
144
        taskModel, taskEffort := ParseModelEffort(cfg.TaskModel)
10✔
145
        claudeExec.Model, claudeExec.Effort = taskModel, taskEffort
10✔
146

10✔
147
        // build review executor (shares base config, may use a different model or effort).
10✔
148
        // compare parsed tuples rather than raw strings so equivalent specs like "opus" and
10✔
149
        // "opus:" don't produce a redundant second executor.
10✔
150
        reviewSpec := cfg.ReviewModel
10✔
151
        if reviewSpec == "" {
17✔
152
                reviewSpec = cfg.TaskModel // fall back to task model spec
7✔
153
        }
7✔
154
        reviewModel, reviewEffort := ParseModelEffort(reviewSpec)
10✔
155
        var reviewExec Executor
10✔
156
        if reviewModel != taskModel || reviewEffort != taskEffort {
12✔
157
                re := &executor.ClaudeExecutor{
2✔
158
                        OutputHandler: claudeExec.OutputHandler,
2✔
159
                        Debug:         cfg.Debug,
2✔
160
                        Model:         reviewModel,
2✔
161
                        Effort:        reviewEffort,
2✔
162
                }
2✔
163
                if cfg.AppConfig != nil {
4✔
164
                        re.Command = cfg.AppConfig.ClaudeCommand
2✔
165
                        re.Args = cfg.AppConfig.ClaudeArgs
2✔
166
                        re.ArgsSet = cfg.AppConfig.ClaudeArgsSet
2✔
167
                        re.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
2✔
168
                        re.LimitPatterns = cfg.AppConfig.ClaudeLimitPatterns
2✔
169
                        re.IdleTimeout = cfg.AppConfig.IdleTimeout
2✔
170
                }
2✔
171
                reviewExec = re
2✔
172
        }
173

174
        // build codex executor with config values
175
        codexExec := &executor.CodexExecutor{
10✔
176
                OutputHandler: func(text string) {
10✔
177
                        log.PrintAligned(text)
×
178
                },
×
179
                Debug: cfg.Debug,
180
        }
181
        if cfg.AppConfig != nil {
20✔
182
                codexExec.Command = cfg.AppConfig.CodexCommand
10✔
183
                codexExec.Model = cfg.AppConfig.CodexModel
10✔
184
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
10✔
185
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
10✔
186
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
10✔
187
                codexExec.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
10✔
188
                codexExec.LimitPatterns = cfg.AppConfig.CodexLimitPatterns
10✔
189
        }
10✔
190

191
        // build custom executor if custom review script is configured
192
        var customExec *executor.CustomExecutor
10✔
193
        if cfg.AppConfig != nil && cfg.AppConfig.CustomReviewScript != "" {
11✔
194
                customExec = &executor.CustomExecutor{
1✔
195
                        Script: cfg.AppConfig.CustomReviewScript,
1✔
196
                        OutputHandler: func(text string) {
1✔
197
                                log.PrintAligned(text)
×
198
                        },
×
199
                        ErrorPatterns: cfg.AppConfig.CodexErrorPatterns, // reuse codex error patterns
200
                        LimitPatterns: cfg.AppConfig.CodexLimitPatterns, // reuse codex limit patterns
201
                }
202
        }
203

204
        // auto-disable codex if the binary is not installed AND we need codex
205
        // (skip this check if using custom external review tool or external review is disabled)
206
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
11✔
207
                codexCmd := codexExec.Command
1✔
208
                if codexCmd == "" {
1✔
209
                        codexCmd = "codex"
×
210
                }
×
211
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
212
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
213
                        cfg.CodexEnabled = false
1✔
214
                }
1✔
215
        }
216

217
        return NewWithExecutors(cfg, log, Executors{Claude: claudeExec, ReviewClaude: reviewExec, Codex: codexExec, Custom: customExec}, holder)
10✔
218
}
219

220
// NewWithExecutors creates a new Runner with custom executors (for testing).
221
func NewWithExecutors(cfg Config, log Logger, execs Executors, holder *status.PhaseHolder) *Runner {
156✔
222
        // determine iteration delay from config or default
156✔
223
        iterDelay := DefaultIterationDelay
156✔
224
        if cfg.IterationDelayMs > 0 {
201✔
225
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
45✔
226
        }
45✔
227

228
        // determine task retry count from config
229
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
230
        retryCount := 1
156✔
231
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
276✔
232
                retryCount = cfg.TaskRetryCount
120✔
233
        } else if cfg.TaskRetryCount > 0 {
157✔
234
                retryCount = cfg.TaskRetryCount
1✔
235
        }
1✔
236

237
        // determine wait-on-limit duration from config
238
        var waitOnLimit time.Duration
156✔
239
        if cfg.AppConfig != nil {
276✔
240
                waitOnLimit = cfg.AppConfig.WaitOnLimit
120✔
241
        }
120✔
242

243
        // if no separate review executor, use the same as task executor
244
        reviewClaude := execs.ReviewClaude
156✔
245
        if reviewClaude == nil {
309✔
246
                reviewClaude = execs.Claude
153✔
247
        }
153✔
248

249
        return &Runner{
156✔
250
                cfg:            cfg,
156✔
251
                log:            log,
156✔
252
                claude:         execs.Claude,
156✔
253
                reviewClaude:   reviewClaude,
156✔
254
                codex:          execs.Codex,
156✔
255
                custom:         execs.Custom,
156✔
256
                phaseHolder:    holder,
156✔
257
                iterationDelay: iterDelay,
156✔
258
                taskRetryCount: retryCount,
156✔
259
                waitOnLimit:    waitOnLimit,
156✔
260
        }
156✔
261
}
262

263
// SetInputCollector sets the input collector for plan creation mode.
264
func (r *Runner) SetInputCollector(c InputCollector) {
17✔
265
        r.inputCollector = c
17✔
266
}
17✔
267

268
// SetGitChecker sets the git checker for no-commit detection in review loops.
269
func (r *Runner) SetGitChecker(g GitChecker) {
8✔
270
        r.git = g
8✔
271
}
8✔
272

273
// SetBreakCh sets the break channel for manual termination of review and task loops.
274
// each value sent on the channel triggers one break event (repeatable, not close-based).
275
func (r *Runner) SetBreakCh(ch <-chan struct{}) {
7✔
276
        r.breakCh = ch
7✔
277
}
7✔
278

279
// SetPauseHandler sets the callback invoked when a break signal is received during task iteration.
280
// the handler should prompt the user and return true to resume or false to abort.
281
// if nil, break during task phase returns ErrUserAborted immediately.
282
func (r *Runner) SetPauseHandler(fn func(ctx context.Context) bool) {
3✔
283
        r.pauseHandler = fn
3✔
284
}
3✔
285

286
// Run executes the main loop based on configured mode.
287
func (r *Runner) Run(ctx context.Context) error {
96✔
288
        switch r.cfg.Mode {
96✔
289
        case ModeFull:
17✔
290
                return r.runFull(ctx)
17✔
291
        case ModeReview:
18✔
292
                return r.runReviewOnly(ctx)
18✔
293
        case ModeCodexOnly:
30✔
294
                return r.runCodexOnly(ctx)
30✔
295
        case ModeTasksOnly:
12✔
296
                return r.runTasksOnly(ctx)
12✔
297
        case ModePlan:
18✔
298
                return r.runPlanCreation(ctx)
18✔
299
        default:
1✔
300
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
301
        }
302
}
303

304
// runFull executes the complete pipeline: tasks → review → codex → review.
305
func (r *Runner) runFull(ctx context.Context) error {
17✔
306
        if r.cfg.PlanFile == "" {
18✔
307
                return errors.New("plan file required for full mode")
1✔
308
        }
1✔
309
        if err := r.validatePlanHasTasks(); err != nil {
17✔
310
                return err
1✔
311
        }
1✔
312

313
        // phase 1: task execution
314
        r.phaseHolder.Set(status.PhaseTask)
15✔
315
        r.log.PrintRaw("starting task execution phase\n")
15✔
316

15✔
317
        if err := r.runTaskPhase(ctx); err != nil {
24✔
318
                if errors.Is(err, ErrUserAborted) {
10✔
319
                        r.log.Print("task phase aborted by user")
1✔
320
                        return ErrUserAborted
1✔
321
                }
1✔
322
                return fmt.Errorf("task phase: %w", err)
8✔
323
        }
324

325
        // phase 2: first review pass - address ALL findings
326
        r.phaseHolder.Set(status.PhaseReview)
6✔
327
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
6✔
328

6✔
329
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
6✔
330
                return fmt.Errorf("first review: %w", err)
×
331
        }
×
332

333
        // phase 2.1: claude review loop (critical/major) before codex
334
        if err := r.runClaudeReviewLoop(ctx); err != nil {
6✔
335
                return fmt.Errorf("pre-codex review loop: %w", err)
×
336
        }
×
337

338
        // phase 2.5+3: codex → post-codex review → finalize
339
        if err := r.runCodexAndPostReview(ctx); err != nil {
6✔
340
                return err
×
341
        }
×
342

343
        r.log.Print("all phases completed successfully")
6✔
344
        return nil
6✔
345
}
346

347
// runReviewOnly executes only the review pipeline: review → codex → review.
348
func (r *Runner) runReviewOnly(ctx context.Context) error {
18✔
349
        // phase 1: first review
18✔
350
        r.phaseHolder.Set(status.PhaseReview)
18✔
351
        r.log.PrintSection(status.NewGenericSection("claude review 0: all findings"))
18✔
352

18✔
353
        if err := r.runClaudeReview(ctx, r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)); err != nil {
19✔
354
                return fmt.Errorf("first review: %w", err)
1✔
355
        }
1✔
356

357
        // phase 1.1: claude review loop (critical/major) before codex
358
        if err := r.runClaudeReviewLoop(ctx); err != nil {
18✔
359
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
360
        }
1✔
361

362
        // phase 2+3: codex → post-codex review → finalize
363
        if err := r.runCodexAndPostReview(ctx); err != nil {
19✔
364
                return err
3✔
365
        }
3✔
366

367
        r.log.Print("review phases completed successfully")
13✔
368
        return nil
13✔
369
}
370

371
// runCodexOnly executes only the codex pipeline: codex → review → finalize.
372
func (r *Runner) runCodexOnly(ctx context.Context) error {
30✔
373
        if err := r.runCodexAndPostReview(ctx); err != nil {
31✔
374
                return err
1✔
375
        }
1✔
376

377
        r.log.Print("codex phases completed successfully")
29✔
378
        return nil
29✔
379
}
380

381
// runCodexAndPostReview runs the shared codex → post-codex claude review → finalize pipeline.
382
// used by runFull, runReviewOnly, and runCodexOnly to avoid duplicating this sequence.
383
func (r *Runner) runCodexAndPostReview(ctx context.Context) error {
52✔
384
        // codex external review loop
52✔
385
        r.phaseHolder.Set(status.PhaseCodex)
52✔
386
        r.log.PrintSection(status.NewGenericSection("codex external review"))
52✔
387

52✔
388
        hadFindings, err := r.runCodexLoop(ctx)
52✔
389
        if err != nil {
55✔
390
                return fmt.Errorf("codex loop: %w", err)
3✔
391
        }
3✔
392

393
        // skip post-codex claude review when external review found nothing on the first pass.
394
        // the purpose of this review is to catch regressions from fixes applied during the external
395
        // review loop — if no findings were reported, no fixes were made and there's nothing to regress.
396
        if !hadFindings {
82✔
397
                r.log.Print("external review found no issues, skipping post-codex claude review")
33✔
398
                return r.runFinalize(ctx)
33✔
399
        }
33✔
400

401
        // claude review loop (critical/major) after codex.
402
        // prepend commit-pending instruction only when external review actually ran,
403
        // because the loop may exit early (max iterations, stalemate, manual break)
404
        // leaving uncommitted fixes in the worktree.
405
        r.phaseHolder.Set(status.PhaseReview)
16✔
406

16✔
407
        var commitPrefix string
16✔
408
        if r.externalReviewTool() != "none" {
32✔
409
                commitPrefix = "IMPORTANT: Before starting the review, run `git status`. " +
16✔
410
                        "If there are uncommitted changes from previous review phases, " +
16✔
411
                        "stage and commit them with message: " +
16✔
412
                        "`fix: address code review findings`\n" +
16✔
413
                        "Then continue with the sequence below.\n\n"
16✔
414
        }
16✔
415
        if err := r.runClaudeReviewLoop(ctx, commitPrefix); err != nil {
16✔
416
                return fmt.Errorf("post-codex review loop: %w", err)
×
417
        }
×
418

419
        // optional finalize step (best-effort, but propagates context cancellation)
420
        return r.runFinalize(ctx)
16✔
421
}
422

423
// runTasksOnly executes only task phase, skipping all reviews.
424
func (r *Runner) runTasksOnly(ctx context.Context) error {
12✔
425
        if r.cfg.PlanFile == "" {
13✔
426
                return errors.New("plan file required for tasks-only mode")
1✔
427
        }
1✔
428
        if err := r.validatePlanHasTasks(); err != nil {
12✔
429
                return err
1✔
430
        }
1✔
431

432
        r.phaseHolder.Set(status.PhaseTask)
10✔
433
        r.log.PrintRaw("starting task execution phase\n")
10✔
434

10✔
435
        if err := r.runTaskPhase(ctx); err != nil {
14✔
436
                if errors.Is(err, ErrUserAborted) {
7✔
437
                        r.log.Print("task phase aborted by user")
3✔
438
                        return ErrUserAborted
3✔
439
                }
3✔
440
                return fmt.Errorf("task phase: %w", err)
1✔
441
        }
442

443
        r.log.Print("task execution completed successfully")
6✔
444
        return nil
6✔
445
}
446

447
// runTaskPhase executes tasks until completion or max iterations.
448
// executes ONE Task section per iteration. supports break (Ctrl+\) with pause+resume:
449
// on break, the current session is canceled, pauseHandler is called, and on resume
450
// the same iteration re-runs with a fresh session that re-reads the plan file.
451
func (r *Runner) runTaskPhase(ctx context.Context) error {
25✔
452
        if r.taskPhaseOverride != nil {
27✔
453
                return r.taskPhaseOverride(ctx)
2✔
454
        }
2✔
455
        prompt := r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt)
23✔
456
        retryCount := 0
23✔
457

23✔
458
        for i := 1; i <= r.cfg.MaxIterations; i++ {
53✔
459
                select {
30✔
460
                case <-ctx.Done():
1✔
461
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
462
                default:
29✔
463
                }
464

465
                // use plan task position instead of loop counter for correct dashboard highlighting
466
                taskNum := i
29✔
467
                if pos := r.nextPlanTaskPosition(); pos > 0 {
44✔
468
                        taskNum = pos
15✔
469
                }
15✔
470
                r.log.PrintSection(status.NewTaskIterationSection(taskNum))
29✔
471

29✔
472
                // create per-iteration break context so Ctrl+\ cancels only the current session
29✔
473
                loopCtx, loopCancel := r.breakContext(ctx)
29✔
474

29✔
475
                result := r.runWithLimitRetry(loopCtx, r.claude.Run, prompt, "claude")
29✔
476

29✔
477
                // check break before calling loopCancel — cancel would make loopCtx.Err() non-nil
29✔
478
                manualBreak := r.isBreak(loopCtx, ctx)
29✔
479
                loopCancel()
29✔
480

29✔
481
                if manualBreak {
33✔
482
                        r.log.Print("session interrupted by break signal")
4✔
483
                        r.drainBreakCh() // clear signal that may have arrived during cancellation
4✔
484
                        if r.pauseHandler == nil || !r.pauseHandler(ctx) {
6✔
485
                                return ErrUserAborted
2✔
486
                        }
2✔
487
                        // resume: decrement i to preserve iteration budget and re-run same task
488
                        r.drainBreakCh() // clear any signal received during pause prompt
2✔
489
                        i--
2✔
490
                        retryCount = 0
2✔
491
                        continue
2✔
492
                }
493

494
                if result.Error != nil {
28✔
495
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
5✔
496
                                return err
2✔
497
                        }
2✔
498
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
499
                }
500

501
                if result.Signal == SignalCompleted {
34✔
502
                        // verify plan actually has no uncompleted checkboxes
12✔
503
                        if r.hasUncompletedTasks() {
12✔
504
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
505
                                continue
×
506
                        }
507
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
12✔
508
                        return nil
12✔
509
                }
510

511
                if result.Signal == SignalFailed {
15✔
512
                        if retryCount < r.taskRetryCount {
7✔
513
                                r.log.Print("task failed, retrying...")
2✔
514
                                retryCount++
2✔
515
                                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
516
                                        return fmt.Errorf("interrupted: %w", err)
×
517
                                }
×
518
                                continue
2✔
519
                        }
520
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
521
                }
522

523
                retryCount = 0
5✔
524
                // continue with same prompt - it reads from plan file each time
5✔
525
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
526
                        return fmt.Errorf("interrupted: %w", err)
1✔
527
                }
1✔
528
        }
529

530
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
531
}
532

533
// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
534
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
24✔
535
        result := r.runWithLimitRetry(ctx, r.reviewClaude.Run, prompt, "claude")
24✔
536
        if result.Error != nil {
24✔
537
                if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
×
538
                        return err
×
539
                }
×
540
                return fmt.Errorf("claude execution: %w", result.Error)
×
541
        }
542

543
        if result.Signal == SignalFailed {
25✔
544
                return errors.New("review failed (FAILED signal received)")
1✔
545
        }
1✔
546

547
        if !isReviewDone(result.Signal) {
24✔
548
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
1✔
549
        }
1✔
550

551
        return nil
23✔
552
}
553

554
// runClaudeReviewLoop runs claude review iterations using second review prompt.
555
// optional promptPrefix is prepended to the review prompt (used for commit-pending instruction after codex).
556
func (r *Runner) runClaudeReviewLoop(ctx context.Context, promptPrefix ...string) error {
39✔
557
        // review iterations = 10% of max_iterations
39✔
558
        maxReviewIterations := max(minReviewIterations, r.cfg.MaxIterations/reviewIterationDivisor)
39✔
559

39✔
560
        prefix := ""
39✔
561
        if len(promptPrefix) > 0 {
55✔
562
                prefix = promptPrefix[0]
16✔
563
        }
16✔
564

565
        for i := 1; i <= maxReviewIterations; i++ {
85✔
566
                select {
46✔
567
                case <-ctx.Done():
×
568
                        return fmt.Errorf("review: %w", ctx.Err())
×
569
                default:
46✔
570
                }
571

572
                r.log.PrintSection(status.NewClaudeReviewSection(i, ": critical/major"))
46✔
573

46✔
574
                // capture HEAD hash before running claude for no-commit detection
46✔
575
                headBefore := r.headHash()
46✔
576

46✔
577
                result := r.runWithLimitRetry(ctx, r.reviewClaude.Run,
46✔
578
                        prefix+r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt), "claude")
46✔
579
                if result.Error != nil {
47✔
580
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
2✔
581
                                return err
1✔
582
                        }
1✔
583
                        return fmt.Errorf("claude execution: %w", result.Error)
×
584
                }
585

586
                if result.Signal == SignalFailed {
45✔
587
                        return errors.New("review failed (FAILED signal received)")
×
588
                }
×
589

590
                if isReviewDone(result.Signal) {
80✔
591
                        r.log.Print("claude review complete - no more findings")
35✔
592
                        return nil
35✔
593
                }
35✔
594

595
                // on session timeout, skip HEAD check and retry; the session was killed before
596
                // it could finish, so "no changes" doesn't mean "nothing to fix"
597
                if r.lastSessionTimedOut {
12✔
598
                        r.log.Print("session timed out, retrying review iteration...")
2✔
599
                        continue
2✔
600
                }
601

602
                // fallback: if HEAD hash hasn't changed, claude found nothing to fix
603
                if headBefore != "" {
10✔
604
                        if headAfter := r.headHash(); headAfter == headBefore {
3✔
605
                                r.log.Print("claude review complete - no changes detected")
1✔
606
                                return nil
1✔
607
                        }
1✔
608
                }
609

610
                r.log.Print("issues fixed, running another review iteration...")
7✔
611
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
7✔
612
                        return fmt.Errorf("interrupted: %w", err)
×
613
                }
×
614
        }
615

616
        r.log.Print("max claude review iterations reached, continuing...")
2✔
617
        return nil
2✔
618
}
619

620
// headHash returns the current HEAD commit hash, or empty string if unavailable.
621
func (r *Runner) headHash() string {
68✔
622
        if r.git == nil {
107✔
623
                return ""
39✔
624
        }
39✔
625
        hash, err := r.git.HeadHash()
29✔
626
        if err != nil {
32✔
627
                r.log.Print("warning: failed to get HEAD hash: %v", err)
3✔
628
                return ""
3✔
629
        }
3✔
630
        return hash
26✔
631
}
632

633
// diffFingerprint returns a hash of the current working tree diff, or empty string if unavailable.
634
func (r *Runner) diffFingerprint() string {
20✔
635
        if r.git == nil {
23✔
636
                return ""
3✔
637
        }
3✔
638
        fp, err := r.git.DiffFingerprint()
17✔
639
        if err != nil {
17✔
640
                r.log.Print("warning: failed to get diff fingerprint: %v", err)
×
641
                return ""
×
642
        }
×
643
        return fp
17✔
644
}
645

646
// checkStalemate compares git state before and after claude evaluation to detect unchanged rounds.
647
// returns the updated unchanged round counter: incremented if no changes detected, reset to 0 otherwise.
648
// when diff fingerprints are unavailable (error), falls back to HEAD-only comparison.
649
func (r *Runner) checkStalemate(headBefore, headAfter, diffBefore, diffAfter string, unchangedRounds int) int {
6✔
650
        unchanged := headAfter == headBefore
6✔
651
        if diffBefore != "" && diffAfter != "" {
12✔
652
                unchanged = unchanged && diffAfter == diffBefore
6✔
653
        }
6✔
654
        if unchanged {
10✔
655
                return unchangedRounds + 1
4✔
656
        }
4✔
657
        return 0
2✔
658
}
659

660
// updateStalemate checks if review patience is enabled, computes the "after" git state,
661
// and returns the updated unchanged-rounds counter plus a flag indicating stalemate.
662
// skips the update if "after" values are empty (transient git error) to avoid resetting the counter.
663
func (r *Runner) updateStalemate(headBefore, diffBefore string, unchangedRounds int) (int, bool) {
26✔
664
        if r.cfg.ReviewPatience <= 0 || headBefore == "" {
46✔
665
                return unchangedRounds, false
20✔
666
        }
20✔
667
        // skip stalemate update if "after" values are empty (transient git error),
668
        // so errors don't reset unchangedRounds and inadvertently disable early exit
669
        if headAfter, diffAfter := r.headHash(), r.diffFingerprint(); headAfter != "" && diffAfter != "" {
12✔
670
                unchangedRounds = r.checkStalemate(headBefore, headAfter, diffBefore, diffAfter, unchangedRounds)
6✔
671
        }
6✔
672
        if unchangedRounds >= r.cfg.ReviewPatience {
7✔
673
                r.log.Print("stalemate detected after %d unchanged rounds, external review terminated early", unchangedRounds)
1✔
674
                return unchangedRounds, true
1✔
675
        }
1✔
676
        return unchangedRounds, false
5✔
677
}
678

679
// externalReviewTool returns the effective external review tool to use.
680
// an explicit ExternalReviewTool choice (e.g. via --external-review-tool) wins
681
// over legacy codex_enabled=false back-compat; otherwise codex_enabled=false
682
// is treated as "none" so users with only that legacy setting still skip
683
// external review.
684
func (r *Runner) externalReviewTool() string {
68✔
685
        if r.cfg.ExternalReviewToolSet && r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
69✔
686
                return r.cfg.AppConfig.ExternalReviewTool
1✔
687
        }
1✔
688

689
        if !r.cfg.CodexEnabled {
87✔
690
                return "none"
20✔
691
        }
20✔
692

693
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
94✔
694
                return r.cfg.AppConfig.ExternalReviewTool
47✔
695
        }
47✔
696

UNCOV
697
        return "codex"
×
698
}
699

700
// runCodexLoop runs the external review loop (codex or custom) until no findings.
701
func (r *Runner) runCodexLoop(ctx context.Context) (bool, error) {
52✔
702
        tool := r.externalReviewTool()
52✔
703

52✔
704
        // skip external review phase if disabled
52✔
705
        if tool == "none" {
73✔
706
                r.log.Print("external review disabled, skipping...")
21✔
707
                return false, nil
21✔
708
        }
21✔
709

710
        // custom review tool
711
        if tool == "custom" {
34✔
712
                if r.custom == nil {
4✔
713
                        return false, errors.New("custom review script not configured")
1✔
714
                }
1✔
715
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
2✔
716
                        name:            "custom",
2✔
717
                        runReview:       func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
4✔
718
                        buildPrompt:     r.buildCustomReviewPrompt,
719
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
720
                        showSummary:     func(string) {}, // no-op: custom output already streamed via OutputHandler
2✔
721
                        makeSection:     status.NewCustomIterationSection,
722
                })
723
        }
724

725
        // default: codex review
726
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
28✔
727
                name:            "codex",
28✔
728
                runReview:       r.codex.Run,
28✔
729
                buildPrompt:     r.buildCodexPrompt,
28✔
730
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
28✔
731
                showSummary:     r.showCodexSummary,
28✔
732
                makeSection:     status.NewCodexIterationSection,
28✔
733
        })
28✔
734
}
735

736
// externalReviewConfig holds callbacks for running an external review tool.
737
type externalReviewConfig struct {
738
        name            string                                                   // tool name for error messages
739
        runReview       func(ctx context.Context, prompt string) executor.Result // run the external review tool
740
        buildPrompt     func(isFirst bool, claudeResponse string) string         // build prompt for review tool
741
        buildEvalPrompt func(output string) string                               // build evaluation prompt for claude
742
        showSummary     func(output string)                                      // display review findings summary
743
        makeSection     func(iteration int) status.Section                       // create section header
744
}
745

746
// runExternalReviewLoop runs a generic external review tool-claude loop.
747
// it terminates when no findings remain, max iterations are reached,
748
// stalemate is detected (review patience), or a manual break is requested.
749
// returns true if findings were found, meaning claude evaluated external review output
750
// and did not signal CodexDone (i.e., there were actionable issues requiring fixes).
751
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) (bool, error) {
30✔
752
        maxIterations := max(minCodexIterations, r.cfg.MaxIterations/codexIterationDivisor)
30✔
753
        if r.cfg.MaxExternalIterations > 0 {
35✔
754
                maxIterations = r.cfg.MaxExternalIterations
5✔
755
        }
5✔
756

757
        // derive a child context that cancels when break channel fires
758
        loopCtx, loopCancel := r.breakContext(ctx)
30✔
759
        defer loopCancel()
30✔
760

30✔
761
        var claudeResponse string // first iteration has no prior response
30✔
762
        var unchangedRounds int   // consecutive iterations with no commits (for stalemate detection)
30✔
763
        firstCompleted := false   // tracks if any successful eval completed; controls diff scope for external tool
30✔
764
        hadFindings := false      // tracks if external review found any issues requiring fixes
30✔
765

30✔
766
        for i := 1; i <= maxIterations; i++ {
84✔
767
                select {
54✔
768
                case <-loopCtx.Done():
×
769
                        if r.isBreak(loopCtx, ctx) {
×
770
                                r.log.Print("manual break requested, external review terminated early")
×
771
                                return hadFindings, nil
×
772
                        }
×
773
                        return hadFindings, fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
774
                default:
54✔
775
                }
776

777
                r.log.PrintSection(cfg.makeSection(i))
54✔
778

54✔
779
                // run external review tool. use branch-wide diff until a successful claude eval completes,
54✔
780
                // so that a timeout on the first eval doesn't narrow subsequent reviews to working-tree only
54✔
781
                reviewResult := r.runWithLimitRetry(loopCtx, cfg.runReview, cfg.buildPrompt(!firstCompleted, claudeResponse), cfg.name)
54✔
782
                if reviewResult.Error != nil {
57✔
783
                        if r.isBreak(loopCtx, ctx) {
4✔
784
                                r.log.Print("manual break requested, external review terminated early")
1✔
785
                                return hadFindings, nil
1✔
786
                        }
1✔
787
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
788
                                return hadFindings, err
1✔
789
                        }
1✔
790
                        return hadFindings, fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
791
                }
792

793
                if reviewResult.Output == "" {
54✔
794
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
3✔
795
                        break
3✔
796
                }
797

798
                // show findings summary before Claude evaluation
799
                cfg.showSummary(reviewResult.Output)
48✔
800

48✔
801
                // capture state before claude evaluation for stalemate detection (only when enabled)
48✔
802
                var headBefore, diffBefore string
48✔
803
                if r.cfg.ReviewPatience > 0 {
62✔
804
                        headBefore = r.headHash()
14✔
805
                        diffBefore = r.diffFingerprint()
14✔
806
                }
14✔
807

808
                // pass output to claude for evaluation and fixing
809
                r.phaseHolder.Set(status.PhaseClaudeEval)
48✔
810
                r.log.PrintSection(status.NewClaudeEvalSection())
48✔
811
                claudeResult := r.runWithLimitRetry(loopCtx, r.reviewClaude.Run, cfg.buildEvalPrompt(reviewResult.Output), "claude")
48✔
812

48✔
813
                // restore codex phase for next iteration
48✔
814
                r.phaseHolder.Set(status.PhaseCodex)
48✔
815
                if claudeResult.Error != nil {
48✔
816
                        if r.isBreak(loopCtx, ctx) {
×
817
                                r.log.Print("manual break requested, external review terminated early")
×
818
                                return hadFindings, nil
×
819
                        }
×
820
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
821
                                return hadFindings, err
×
822
                        }
×
823
                        return hadFindings, fmt.Errorf("claude execution: %w", claudeResult.Error)
×
824
                }
825

826
                // on session timeout, skip response capture and stalemate detection; the session was killed
827
                // before it could finish, so partial output can't be trusted as previous context and
828
                // "no changes" doesn't mean "nothing to fix"
829
                if r.lastSessionTimedOut {
51✔
830
                        r.log.Print("claude eval session timed out, retrying %s iteration...", cfg.name)
3✔
831
                        continue
3✔
832
                }
833

834
                firstCompleted = true // successful eval completed, next iteration can use working-tree diff
45✔
835
                claudeResponse = claudeResult.Output
45✔
836

45✔
837
                // exit only when claude sees "no findings"
45✔
838
                if isCodexDone(claudeResult.Signal) {
64✔
839
                        r.log.Print("%s review complete - no more findings", cfg.name)
19✔
840
                        return hadFindings, nil
19✔
841
                }
19✔
842

843
                // findings were reported and need fixing — mark for post-codex review
844
                hadFindings = true
26✔
845

26✔
846
                // stalemate detection: track consecutive rounds with no changes (commits or working tree edits).
26✔
847
                // the eval prompt tells claude not to commit during fix rounds, so HEAD alone can't distinguish
26✔
848
                // "rejected findings" from "made fixes without commit". checking the diff fingerprint catches
26✔
849
                // working tree edits, making the detection accurate for both cases.
26✔
850
                var stalemate bool
26✔
851
                unchangedRounds, stalemate = r.updateStalemate(headBefore, diffBefore, unchangedRounds)
26✔
852
                if stalemate {
27✔
853
                        return hadFindings, nil
1✔
854
                }
1✔
855

856
                if err := r.sleepWithContext(loopCtx, r.iterationDelay); err != nil {
25✔
857
                        if r.isBreak(loopCtx, ctx) {
×
858
                                r.log.Print("manual break requested, external review terminated early")
×
859
                                return hadFindings, nil
×
860
                        }
×
861
                        return hadFindings, fmt.Errorf("interrupted: %w", err)
×
862
                }
863
        }
864

865
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
7✔
866
        return hadFindings, nil
7✔
867
}
868

869
// breakContext derives a child context that cancels when one value is drained from the break channel.
870
// if no break channel is configured, returns the parent context and a no-op cancel.
871
// callers detect break by checking loopCtx.Err() != nil && parentCtx.Err() == nil.
872
func (r *Runner) breakContext(parent context.Context) (context.Context, context.CancelFunc) {
59✔
873
        if r.breakCh == nil {
111✔
874
                return parent, func() {}
104✔
875
        }
876
        ctx, cancel := context.WithCancel(parent)
7✔
877
        go func() {
14✔
878
                select {
7✔
879
                case <-r.breakCh:
5✔
880
                        cancel()
5✔
881
                case <-ctx.Done():
2✔
882
                }
883
        }()
884
        return ctx, cancel
7✔
885
}
886

887
// isBreak returns true if the loop context was canceled by a break signal
888
// while the parent context is still alive. does not read from the break channel,
889
// so it can be called without consuming a pending signal.
890
func (r *Runner) isBreak(loopCtx, parentCtx context.Context) bool {
32✔
891
        return loopCtx.Err() != nil && parentCtx.Err() == nil
32✔
892
}
32✔
893

894
// drainBreakCh does a non-blocking drain of one pending value from the break channel.
895
// called after pause+resume to prevent a SIGQUIT received during the pause prompt
896
// from immediately canceling the next iteration. not called on normal iteration
897
// boundaries so that a legitimate Ctrl+\ between iterations is preserved.
898
func (r *Runner) drainBreakCh() {
9✔
899
        if r.breakCh == nil {
10✔
900
                return
1✔
901
        }
1✔
902
        select {
8✔
903
        case <-r.breakCh:
2✔
904
        default:
6✔
905
        }
906
}
907

908
// buildCodexPrompt creates the prompt for codex review.
909
// uses the codex_review prompt loaded from config with all variables expanded,
910
// including {{PREVIOUS_REVIEW_CONTEXT}} for iteration context.
911
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
60✔
912
        return r.replaceVariablesWithIteration(r.cfg.AppConfig.CodexReviewPrompt, isFirst, claudeResponse)
60✔
913
}
60✔
914

915
// validatePlanHasTasks returns an error if the plan file has no executable task sections.
916
// guards against spec/reference docs that lack ### Task N: / ### Iteration N: headers,
917
// which would otherwise cause the task loop to retry TASK_FAILED until exhaustion.
918
// callers must ensure r.cfg.PlanFile is non-empty before invoking.
919
func (r *Runner) validatePlanHasTasks() error {
36✔
920
        path := r.resolvePlanFilePath()
36✔
921
        p, err := plan.ParsePlanFile(path)
36✔
922
        if err != nil {
37✔
923
                return fmt.Errorf("parse plan for validation: %w", err)
1✔
924
        }
1✔
925
        if len(p.Tasks) == 0 {
41✔
926
                return fmt.Errorf("plan file %q has no executable task sections (### Task N: or ### Iteration N:); add task sections or pass a different plan file", path)
6✔
927
        }
6✔
928
        return nil
29✔
929
}
930

931
// hasUncompletedTasks checks if any Task section has uncompleted checkboxes.
932
// only Task sections (### Task N: or ### Iteration N:) are considered.
933
// checkboxes in Success criteria, Overview, or Context are ignored for this check,
934
// so the agent can output ALL_TASKS_DONE when those are verification-only.
935
// for malformed plans (checkboxes without task headers), returns true if any [ ] exists.
936
func (r *Runner) hasUncompletedTasks() bool {
22✔
937
        path := r.resolvePlanFilePath()
22✔
938
        if path == "" {
22✔
939
                return false // no plan file, nothing to complete
×
940
        }
×
941
        p, err := plan.ParsePlanFile(path)
22✔
942
        if err != nil {
22✔
943
                r.log.Print("[WARN] failed to parse plan file for completion check: %v", err)
×
944
                return true // assume incomplete if can't read
×
945
        }
×
946
        for _, t := range p.Tasks {
47✔
947
                if t.HasUncompletedActionableWork() {
29✔
948
                        return true
4✔
949
                }
4✔
950
        }
951
        // malformed plans: no task headers but file has [ ] — treat as incomplete
952
        if len(p.Tasks) == 0 {
20✔
953
                has, err := plan.FileHasUncompletedCheckbox(path)
2✔
954
                if err != nil {
2✔
955
                        return true
×
956
                }
×
957
                if has {
3✔
958
                        return true
1✔
959
                }
1✔
960
        }
961
        return false
17✔
962
}
963

964
// nextPlanTaskPosition returns the 1-indexed position of the first uncompleted task in the plan.
965
// returns 0 if the plan file can't be read/parsed or no uncompleted tasks exist (caller falls back to loop counter).
966
func (r *Runner) nextPlanTaskPosition() int {
39✔
967
        p, err := plan.ParsePlanFile(r.resolvePlanFilePath())
39✔
968
        if err != nil {
41✔
969
                r.log.Print("[WARN] failed to parse plan file for task position: %v", err)
2✔
970
                return 0
2✔
971
        }
2✔
972
        for i, t := range p.Tasks {
83✔
973
                if t.HasUncompletedActionableWork() {
67✔
974
                        return i + 1 // 1-indexed
21✔
975
                }
21✔
976
        }
977
        return 0
16✔
978
}
979

980
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
981
// extracts text until first code block or maxCodexSummaryLen chars, whichever is shorter.
982
func (r *Runner) showCodexSummary(output string) {
46✔
983
        r.showExternalReviewSummary("codex", output)
46✔
984
}
46✔
985

986
// showExternalReviewSummary displays a condensed summary of external review output.
987
// extracts text until first code block or 5000 chars, whichever is shorter.
988
func (r *Runner) showExternalReviewSummary(toolName, output string) {
46✔
989
        summary := output
46✔
990

46✔
991
        // trim to first code block if present
46✔
992
        if idx := strings.Index(summary, "```"); idx > 0 {
46✔
993
                summary = summary[:idx]
×
994
        }
×
995

996
        // limit to maxCodexSummaryLen runes to avoid splitting multi-byte characters
997
        if runes := []rune(summary); len(runes) > maxCodexSummaryLen {
46✔
998
                summary = string(runes[:maxCodexSummaryLen]) + "..."
×
999
        }
×
1000

1001
        summary = strings.TrimSpace(summary)
46✔
1002
        if summary == "" {
46✔
1003
                return
×
1004
        }
×
1005

1006
        r.log.Print("%s findings:", toolName)
46✔
1007
        for line := range strings.SplitSeq(summary, "\n") {
92✔
1008
                if strings.TrimSpace(line) == "" {
46✔
1009
                        continue
×
1010
                }
1011
                r.log.PrintAligned("  " + line)
46✔
1012
        }
1013
}
1014

1015
// ErrUserAborted is a sentinel error returned when the user aborts or declines to resume after a break
1016
// signal (Ctrl+\). it is propagated as a non-nil error so that callers (including mode entrypoints) can
1017
// detect it and treat it as a clean user-initiated exit, avoiding further review/finalize steps.
1018
var ErrUserAborted = errors.New("user aborted")
1019

1020
// ErrUserRejectedPlan is returned when user rejects the plan draft.
1021
var ErrUserRejectedPlan = errors.New("user rejected plan")
1022

1023
// draftReviewResult holds the result of draft review handling.
1024
type draftReviewResult struct {
1025
        handled  bool   // true if draft was found and handled
1026
        feedback string // revision feedback (non-empty only for "revise" action)
1027
        err      error  // error if review failed or user rejected
1028
}
1029

1030
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
1031
// returns result indicating whether draft was handled and any feedback/errors.
1032
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
16✔
1033
        planContent, draftErr := parsePlanDraftPayload(output)
16✔
1034
        if draftErr != nil {
25✔
1035
                // log malformed signals (but not "no signal" which is expected)
9✔
1036
                if !errors.Is(draftErr, errNoPlanDraftSignal) {
10✔
1037
                        r.log.Print("warning: %v", draftErr)
1✔
1038
                }
1✔
1039
                return draftReviewResult{handled: false}
9✔
1040
        }
1041

1042
        r.log.Print("plan draft ready for review")
7✔
1043

7✔
1044
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
7✔
1045
        if askErr != nil {
8✔
1046
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
1047
        }
1✔
1048

1049
        // log the draft review action and feedback to progress file
1050
        r.log.LogDraftReview(action, feedback)
6✔
1051

6✔
1052
        switch action {
6✔
1053
        case "accept":
3✔
1054
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
1055
                return draftReviewResult{handled: true}
3✔
1056
        case "revise":
2✔
1057
                r.log.Print("revision requested, re-running with feedback...")
2✔
1058
                return draftReviewResult{handled: true, feedback: feedback}
2✔
1059
        case "reject":
1✔
1060
                r.log.Print("plan rejected by user")
1✔
1061
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
1062
        }
1063

1064
        return draftReviewResult{handled: true}
×
1065
}
1066

1067
// handlePlanQuestion processes QUESTION signal if present in output.
1068
// returns true if question was found and handled, false otherwise.
1069
// returns error if question handling failed.
1070
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
1071
        question, err := parseQuestionPayload(output)
9✔
1072
        if err != nil {
15✔
1073
                // log malformed signals (but not "no signal" which is expected)
6✔
1074
                if !errors.Is(err, errNoQuestionSignal) {
6✔
1075
                        r.log.Print("warning: %v", err)
×
1076
                }
×
1077
                return false, nil
6✔
1078
        }
1079

1080
        r.log.LogQuestion(question.Question, question.Options)
3✔
1081

3✔
1082
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
1083
        if askErr != nil {
4✔
1084
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
1085
        }
1✔
1086

1087
        r.log.LogAnswer(answer)
2✔
1088
        return true, nil
2✔
1089
}
1090

1091
// runPlanCreation executes the interactive plan creation loop.
1092
// the loop continues until PLAN_READY signal or max iterations reached.
1093
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
1094
func (r *Runner) runPlanCreation(ctx context.Context) error {
18✔
1095
        if r.cfg.PlanDescription == "" {
19✔
1096
                return errors.New("plan description required for plan mode")
1✔
1097
        }
1✔
1098
        if r.inputCollector == nil {
18✔
1099
                return errors.New("input collector required for plan mode")
1✔
1100
        }
1✔
1101

1102
        r.phaseHolder.Set(status.PhasePlan)
16✔
1103
        r.log.PrintRaw("starting interactive plan creation\n")
16✔
1104
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
16✔
1105

16✔
1106
        // plan iterations use 20% of max_iterations
16✔
1107
        maxPlanIterations := max(minPlanIterations, r.cfg.MaxIterations/planIterationDivisor)
16✔
1108

16✔
1109
        // track revision feedback for context in next iteration
16✔
1110
        var lastRevisionFeedback string
16✔
1111

16✔
1112
        for i := 1; i <= maxPlanIterations; i++ {
46✔
1113
                select {
30✔
1114
                case <-ctx.Done():
1✔
1115
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
1116
                default:
29✔
1117
                }
1118

1119
                r.log.PrintSection(status.NewPlanIterationSection(i))
29✔
1120

29✔
1121
                prompt := r.buildPlanPrompt()
29✔
1122
                // append revision feedback context if present
29✔
1123
                hadFeedback := lastRevisionFeedback != ""
29✔
1124
                if hadFeedback {
32✔
1125
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
3✔
1126
                }
3✔
1127

1128
                result := r.runWithLimitRetry(ctx, r.claude.Run, prompt, "claude")
29✔
1129
                if result.Error != nil {
31✔
1130
                        if err := r.handlePatternMatchError(result.Error, "claude"); err != nil {
3✔
1131
                                return err
1✔
1132
                        }
1✔
1133
                        return fmt.Errorf("claude execution: %w", result.Error)
1✔
1134
                }
1135

1136
                if result.Signal == SignalFailed {
28✔
1137
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
1138
                }
1✔
1139

1140
                // check for PLAN_READY signal
1141
                if isPlanReady(result.Signal) {
34✔
1142
                        r.log.Print("plan creation completed")
8✔
1143
                        return nil
8✔
1144
                }
8✔
1145

1146
                // on session timeout, skip output parsing and retry; the session was killed before
1147
                // it could finish, so partial output may contain truncated PLAN_DRAFT or QUESTION markers.
1148
                // preserve lastRevisionFeedback so the next attempt re-sends the user's revision request
1149
                if r.lastSessionTimedOut {
20✔
1150
                        r.log.Print("plan creation session timed out, retrying iteration...")
2✔
1151
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1152
                                return fmt.Errorf("interrupted: %w", err)
×
1153
                        }
×
1154
                        continue
2✔
1155
                }
1156

1157
                // session completed successfully, clear revision feedback since it was consumed
1158
                if hadFeedback {
17✔
1159
                        lastRevisionFeedback = ""
1✔
1160
                }
1✔
1161

1162
                // check for PLAN_DRAFT signal - present draft for user review
1163
                draftResult := r.handlePlanDraft(ctx, result.Output)
16✔
1164
                if draftResult.err != nil {
18✔
1165
                        return draftResult.err
2✔
1166
                }
2✔
1167
                if draftResult.handled {
19✔
1168
                        lastRevisionFeedback = draftResult.feedback
5✔
1169
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
5✔
1170
                                return fmt.Errorf("interrupted: %w", err)
×
1171
                        }
×
1172
                        continue
5✔
1173
                }
1174

1175
                // check for QUESTION signal
1176
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
1177
                if err != nil {
10✔
1178
                        return err
1✔
1179
                }
1✔
1180
                if handled {
10✔
1181
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1182
                                return fmt.Errorf("interrupted: %w", err)
×
1183
                        }
×
1184
                        continue
2✔
1185
                }
1186

1187
                // no question, no draft, and no completion - continue
1188
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
1189
                        return fmt.Errorf("interrupted: %w", err)
×
1190
                }
×
1191
        }
1192

1193
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
1194
}
1195

1196
// handlePatternMatchError checks if err is a PatternMatchError or LimitPatternError and logs appropriate messages.
1197
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
1198
func (r *Runner) handlePatternMatchError(err error, tool string) error {
10✔
1199
        var patternErr *executor.PatternMatchError
10✔
1200
        if errors.As(err, &patternErr) {
14✔
1201
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
1202
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
1203
                return err
4✔
1204
        }
4✔
1205
        var limitErr *executor.LimitPatternError
6✔
1206
        if errors.As(err, &limitErr) {
8✔
1207
                r.log.Print("error: detected %q in %s output", limitErr.Pattern, tool)
2✔
1208
                r.log.Print("run '%s' for more information", limitErr.HelpCmd)
2✔
1209
                return err
2✔
1210
        }
2✔
1211
        return nil
4✔
1212
}
1213

1214
// runWithLimitRetry wraps an executor Run() call with rate limit retry logic and optional session timeout.
1215
// if the result contains a LimitPatternError and waitOnLimit > 0, it logs a message, waits, and retries.
1216
// if waitOnLimit == 0, the LimitPatternError is returned as-is (existing exit behavior).
1217
// other errors (including PatternMatchError) are returned without retry.
1218
// when SessionTimeout > 0, each run() call gets a child context with deadline.
1219
// on session timeout (child timed out but parent alive), logs a warning and returns result with error cleared.
1220
// retries indefinitely until success or context cancellation.
1221
func (r *Runner) runWithLimitRetry(ctx context.Context, run func(context.Context, string) executor.Result,
1222
        prompt, toolName string) executor.Result {
247✔
1223
        for {
499✔
1224
                result := r.runWithSessionTimeout(ctx, run, prompt, toolName)
252✔
1225
                if result.Error == nil {
480✔
1226
                        return result
228✔
1227
                }
228✔
1228

1229
                var limitErr *executor.LimitPatternError
24✔
1230
                if !errors.As(result.Error, &limitErr) {
39✔
1231
                        return result // not a limit error, return as-is
15✔
1232
                }
15✔
1233

1234
                if r.waitOnLimit <= 0 {
12✔
1235
                        return result // no wait configured, return limit error as-is
3✔
1236
                }
3✔
1237

1238
                r.log.Print("rate limit detected: %q in %s output, waiting %s before retry...",
6✔
1239
                        limitErr.Pattern, toolName, r.waitOnLimit)
6✔
1240

6✔
1241
                if err := r.sleepWithContext(ctx, r.waitOnLimit); err != nil {
7✔
1242
                        return executor.Result{Error: fmt.Errorf("interrupted during limit wait: %w", ctx.Err())}
1✔
1243
                }
1✔
1244
        }
1245
}
1246

1247
// runWithSessionTimeout runs the executor with an optional session timeout.
1248
// if SessionTimeout > 0 and toolName is "claude", wraps ctx with context.WithTimeout before calling run.
1249
// on session timeout (child timed out but parent alive), logs a warning and clears the error
1250
// so callers treat it as a non-completing iteration that continues naturally.
1251
// only applies to claude sessions; codex and custom executors are not affected.
1252
func (r *Runner) runWithSessionTimeout(ctx context.Context, run func(context.Context, string) executor.Result,
1253
        prompt, toolName string) executor.Result {
257✔
1254
        r.lastSessionTimedOut = false
257✔
1255
        sessionTimeout := r.sessionTimeout()
257✔
1256
        if sessionTimeout <= 0 || toolName != "claude" {
493✔
1257
                result := run(ctx, prompt) // no timeout configured or non-claude tool
236✔
1258
                // idle timeout without signal looks like "nothing to fix" to review loops;
236✔
1259
                // treat it like session timeout so they retry instead of exiting.
236✔
1260
                if result.IdleTimedOut && result.Signal == "" {
237✔
1261
                        r.log.Print("warning: %s session idle timed out, no output activity detected", toolName)
1✔
1262
                        r.lastSessionTimedOut = true
1✔
1263
                }
1✔
1264
                return result
236✔
1265
        }
1266

1267
        childCtx, cancel := context.WithTimeout(ctx, sessionTimeout)
21✔
1268
        defer cancel()
21✔
1269

21✔
1270
        result := run(childCtx, prompt)
21✔
1271

21✔
1272
        // check if this was a session timeout: child context expired but parent is still alive.
21✔
1273
        // clear the error so callers (task loop, review loop) treat it as a non-completing iteration
21✔
1274
        // rather than aborting the phase. set lastSessionTimedOut so review loops can distinguish
21✔
1275
        // timeout from "genuinely found nothing" and continue instead of exiting.
21✔
1276
        if childCtx.Err() != nil && ctx.Err() == nil {
32✔
1277
                r.log.Print("warning: %s session timed out after %s, the agent may have started a blocking operation",
11✔
1278
                        toolName, sessionTimeout)
11✔
1279
                result.Error = nil
11✔
1280
                result.Signal = "" // clear any signal emitted before timeout; can't trust partial session
11✔
1281
                r.lastSessionTimedOut = true
11✔
1282
        } else if result.IdleTimedOut && result.Signal == "" {
21✔
1283
                // idle timeout without signal: same treatment as session timeout for review loops
×
1284
                r.log.Print("warning: %s session idle timed out, no output activity detected", toolName)
×
1285
                r.lastSessionTimedOut = true
×
1286
        }
×
1287

1288
        return result
21✔
1289
}
1290

1291
// sessionTimeout returns the configured session timeout duration.
1292
// returns 0 if not configured or AppConfig is nil.
1293
func (r *Runner) sessionTimeout() time.Duration {
257✔
1294
        if r.cfg.AppConfig == nil {
257✔
1295
                return 0
×
1296
        }
×
1297
        return r.cfg.AppConfig.SessionTimeout
257✔
1298
}
1299

1300
// runFinalize executes the optional finalize step after successful reviews.
1301
// runs once, best-effort: failures are logged but don't block success.
1302
// exception: context cancellation is propagated (user wants to abort).
1303
func (r *Runner) runFinalize(ctx context.Context) error {
49✔
1304
        if !r.cfg.FinalizeEnabled {
88✔
1305
                return nil
39✔
1306
        }
39✔
1307

1308
        r.phaseHolder.Set(status.PhaseFinalize)
10✔
1309
        r.log.PrintSection(status.NewGenericSection("finalize step"))
10✔
1310

10✔
1311
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
10✔
1312
        result := r.runWithLimitRetry(ctx, r.reviewClaude.Run, prompt, "claude")
10✔
1313

10✔
1314
        if result.Error != nil {
13✔
1315
                // propagate context cancellation - user wants to abort
3✔
1316
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
4✔
1317
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
1318
                }
1✔
1319
                // pattern match (rate limit or error) - log via shared helper, but don't fail (best-effort)
1320
                if r.handlePatternMatchError(result.Error, "claude") != nil {
3✔
1321
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
1✔
1322
                }
1✔
1323
                // best-effort: log error but don't fail
1324
                r.log.Print("finalize step failed: %v", result.Error)
1✔
1325
                return nil
1✔
1326
        }
1327

1328
        if result.Signal == SignalFailed {
8✔
1329
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
1330
                return nil
1✔
1331
        }
1✔
1332

1333
        r.log.Print("finalize step completed")
6✔
1334
        return nil
6✔
1335
}
1336

1337
// sleepWithContext pauses for the given duration but returns immediately if context is canceled.
1338
// returns ctx.Err() on cancellation, nil on normal completion.
1339
func (r *Runner) sleepWithContext(ctx context.Context, d time.Duration) error {
60✔
1340
        t := time.NewTimer(d)
60✔
1341
        defer t.Stop()
60✔
1342
        select {
60✔
1343
        case <-t.C:
58✔
1344
                return nil
58✔
1345
        case <-ctx.Done():
2✔
1346
                return fmt.Errorf("sleep interrupted: %w", ctx.Err())
2✔
1347
        }
1348
}
1349

1350
// needsCodexBinary returns true if the current configuration requires the codex binary.
1351
// returns false when external_review_tool is "custom" or "none", since codex isn't used.
1352
func needsCodexBinary(appConfig *config.Config) bool {
3✔
1353
        if appConfig == nil {
3✔
1354
                return true // default behavior assumes codex
×
1355
        }
×
1356
        switch appConfig.ExternalReviewTool {
3✔
1357
        case "custom", "none":
2✔
1358
                return false
2✔
1359
        default:
1✔
1360
                return true // "codex" or empty (default) requires codex binary
1✔
1361
        }
1362
}
1363

1364
// ParseModelEffort splits a "model[:effort]" spec into separate parts.
1365
// Used by New to parse task_model/review_model config values into the
1366
// ClaudeExecutor.Model and ClaudeExecutor.Effort fields.
1367
// Empty input returns ("", ""). Missing colon returns (s, "").
1368
// A leading colon (":high") returns ("", "high"); a trailing colon ("opus:") returns ("opus", "").
1369
// Only the first colon is treated as the separator; anything after is passed through as effort.
1370
func ParseModelEffort(s string) (model, effort string) {
27✔
1371
        model, effort, _ = strings.Cut(s, ":")
27✔
1372
        return model, effort
27✔
1373
}
27✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc