• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

umputun / ralphex / 26265094477

22 May 2026 02:34AM UTC coverage: 83.341% (+0.4%) from 82.902%
26265094477

Pull #350

github

umputun
feat(codex): inject task-phase directive to suppress conflicting skill workflows

Codex auto-activates skills from the user's ~/.codex/skills/ based on prompt
content. A user's plan-execution skill triggers on the same wording ralphex's
task prompt uses, runs a competing workflow, and floods the progress stream
with recited skill text. prependCodexTaskGuidance prepends a generic directive
telling codex that ralphex's task prompt is authoritative when an
auto-activated skill's workflow conflicts with it. Active only under the codex
executor; wired in runTaskPhase. Soft mitigation — codex 0.133.0 exposes no
per-invocation skill-disable flag.
Pull Request #350: Add first-class codex executor mode

649 of 712 new or added lines in 9 files covered. (91.15%)

14 existing lines in 4 files now uncovered.

7439 of 8926 relevant lines covered (83.34%)

232.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.24
/pkg/processor/runner.go
1
// Package processor provides the main orchestration loop for ralphex execution.
2
package processor
3

4
import (
5
        "context"
6
        "errors"
7
        "fmt"
8
        "io/fs"
9
        "os"
10
        "os/exec"
11
        "path/filepath"
12
        "strings"
13
        "sync"
14
        "time"
15

16
        "github.com/umputun/ralphex/pkg/config"
17
        "github.com/umputun/ralphex/pkg/executor"
18
        "github.com/umputun/ralphex/pkg/plan"
19
        "github.com/umputun/ralphex/pkg/status"
20
)
21

22
// DefaultIterationDelay is the pause between iterations to allow system to settle.
23
const DefaultIterationDelay = 2 * time.Second
24

25
const (
26
        minReviewIterations    = 3    // minimum claude review iterations
27
        reviewIterationDivisor = 10   // review iterations = max_iterations / divisor
28
        minCodexIterations     = 3    // minimum codex review iterations
29
        codexIterationDivisor  = 5    // codex iterations = max_iterations / divisor
30
        minPlanIterations      = 5    // minimum plan creation iterations
31
        planIterationDivisor   = 5    // plan iterations = max_iterations / divisor
32
        maxCodexSummaryLen     = 5000 // max chars for codex output summary
33
)
34

35
// Mode represents the execution mode.
36
type Mode string
37

38
const (
39
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
40
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
41
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
42
        ModeTasksOnly Mode = "tasks-only" // run only task phase, skip all reviews
43
        ModePlan      Mode = "plan"       // interactive plan creation mode
44
)
45

46
// Config holds runner configuration.
47
type Config struct {
48
        PlanFile              string         // path to plan file (required for full mode)
49
        PlanDescription       string         // plan description for interactive plan creation mode
50
        ProgressPath          string         // path to progress file
51
        Mode                  Mode           // execution mode
52
        MaxIterations         int            // maximum iterations for task phase
53
        MaxExternalIterations int            // override external review iteration limit (0 = auto)
54
        ReviewPatience        int            // terminate external review after N unchanged rounds (0 = disabled)
55
        Debug                 bool           // enable debug output
56
        NoColor               bool           // disable color output
57
        IterationDelayMs      int            // delay between iterations in milliseconds
58
        TaskRetryCount        int            // number of times to retry failed tasks
59
        TaskModel             string         // model[:effort] spec for task execution; parsed via ParseModelEffort (empty = CLI defaults)
60
        ReviewModel           string         // model[:effort] spec for review phases; empty falls back to TaskModel
61
        CodexEnabled          bool           // whether codex review is enabled
62
        ExternalReviewToolSet bool           // when true, AppConfig.ExternalReviewTool is an explicit choice that overrides codex_enabled=false back-compat
63
        FinalizeEnabled       bool           // whether finalize step is enabled
64
        DefaultBranch         string         // default branch name (detected from repo)
65
        AppConfig             *config.Config // full application config (for executors and prompts)
66
}
67

68
// isCodexExecutor reports whether the configured task/review executor is codex
69
// (the --codex first-class mode). returns false when AppConfig is nil or the
70
// executor is anything else (claude is the default).
71
func (c Config) isCodexExecutor() bool {
1,279✔
72
        return c.AppConfig != nil && c.AppConfig.Executor == config.ExecutorCodex
1,279✔
73
}
1,279✔
74

75
//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
76
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
77
//go:generate moq -out mocks/input_collector.go -pkg mocks -skip-ensure -fmt goimports . InputCollector
78
//go:generate moq -out mocks/git_checker.go -pkg mocks -skip-ensure -fmt goimports . GitChecker
79

80
// Executor runs CLI commands and returns results.
81
type Executor interface {
82
        Run(ctx context.Context, prompt string) executor.Result
83
}
84

85
// Logger provides logging functionality.
86
type Logger interface {
87
        Print(format string, args ...any)
88
        PrintRaw(format string, args ...any)
89
        PrintSection(section status.Section)
90
        PrintAligned(text string)
91
        LogQuestion(question string, options []string)
92
        LogAnswer(answer string)
93
        LogDraftReview(action string, feedback string)
94
        Path() string
95
}
96

97
// InputCollector provides interactive input collection for plan creation.
98
type InputCollector interface {
99
        AskQuestion(ctx context.Context, question string, options []string) (string, error)
100
        AskDraftReview(ctx context.Context, question string, planContent string) (action string, feedback string, err error)
101
}
102

103
// GitChecker provides git state inspection for the review loop.
104
type GitChecker interface {
105
        HeadHash() (string, error)
106
        DiffFingerprint() (string, error)
107
}
108

109
// Executors groups the executor dependencies for the Runner.
110
// Role-named: Task is used for the task phase, Review for review phases (nil = use Task),
111
// External for the external review phase (nil = no external review), Custom is the
112
// custom external review script executor.
113
type Executors struct {
114
        Task     Executor
115
        Review   Executor // optional: separate executor for review phases (nil = use Task)
116
        External Executor // external review executor (codex or wrapper); nil when Executor=codex or external review disabled
117
        Custom   *executor.CustomExecutor
118
}
119

120
// Runner orchestrates the execution loop.
121
type Runner struct {
122
        cfg                    Config
123
        log                    Logger
124
        task                   Executor // executor for task phase
125
        review                 Executor // executor for review phases (may differ in model)
126
        external               Executor // executor for the external review phase (codex by default)
127
        custom                 *executor.CustomExecutor
128
        git                    GitChecker
129
        inputCollector         InputCollector
130
        phaseHolder            *status.PhaseHolder
131
        iterationDelay         time.Duration
132
        taskRetryCount         int
133
        waitOnLimit            time.Duration
134
        breakCh                <-chan struct{}                 // nil = feature disabled; receives one value per break signal
135
        pauseHandler           func(ctx context.Context) bool  // called on break during task phase; true = resume, false = abort
136
        lastSessionTimedOut    bool                            // set by runWithSessionTimeout, checked by review loops
137
        taskPhaseOverride      func(ctx context.Context) error // test seam: override runTaskPhase result (nil = normal execution)
138
        codexFrontmatterWarned map[string]bool                 // tracks per-agent codex frontmatter-discard warnings (one log per agent name)
139
}
140

141
// New creates a new Runner with the given configuration and shared phase holder.
142
// If codex is enabled but the binary is not found in PATH, it is automatically disabled with a warning.
143
func New(cfg Config, log Logger, holder *status.PhaseHolder) *Runner {
20✔
144
        customExec := cfg.buildCustomExecutor(log)
20✔
145

20✔
146
        if cfg.isCodexExecutor() {
28✔
147
                if cfg.AppConfig.PassClaudeMd {
11✔
148
                        maybeEmitClaudeMdSetupHint(log)
3✔
149
                }
3✔
150
                // one shared codex executor with multi_agent always enabled — task, review, and
151
                // finalize all run the same codex configuration so any prompt can use
152
                // {{agent:...}} expansions if the user customizes it.
153
                codexExec := cfg.buildCodexExecutor(log)
8✔
154
                return NewWithExecutors(cfg, log, Executors{Task: codexExec, Review: codexExec, External: nil, Custom: customExec}, holder)
8✔
155
        }
156

157
        claudeExec, reviewExec := cfg.buildClaudeExecutors(log)
12✔
158
        codexExec := cfg.buildExternalCodexExecutor(log)
12✔
159

12✔
160
        if cfg.CodexEnabled && needsCodexBinary(cfg.AppConfig) {
13✔
161
                codexCmd := codexExec.Command
1✔
162
                if codexCmd == "" {
1✔
NEW
163
                        codexCmd = "codex"
×
NEW
164
                }
×
165
                if _, err := exec.LookPath(codexCmd); err != nil {
2✔
166
                        log.Print("warning: codex not found (%s: %v), disabling codex review phase", codexCmd, err)
1✔
167
                        cfg.CodexEnabled = false
1✔
168
                }
1✔
169
        }
170

171
        return NewWithExecutors(cfg, log, Executors{Task: claudeExec, Review: reviewExec, External: codexExec, Custom: customExec}, holder)
12✔
172
}
173

174
// buildClaudeExecutors constructs the claude executors for task and review phases.
175
// returns a single executor in the Review slot only when review_model differs from
176
// task_model — otherwise the task executor handles both roles.
177
func (cfg Config) buildClaudeExecutors(log Logger) (*executor.ClaudeExecutor, Executor) {
12✔
178
        claudeExec := &executor.ClaudeExecutor{
12✔
179
                OutputHandler: func(text string) {
12✔
180
                        log.PrintAligned(text)
×
181
                },
×
182
                Debug: cfg.Debug,
183
        }
184
        cfg.applyClaudeAppConfig(claudeExec)
12✔
185

12✔
186
        taskModel, taskEffort := ParseModelEffort(cfg.TaskModel)
12✔
187
        claudeExec.Model, claudeExec.Effort = taskModel, taskEffort
12✔
188

12✔
189
        reviewSpec := cfg.ReviewModel
12✔
190
        if reviewSpec == "" {
21✔
191
                reviewSpec = cfg.TaskModel
9✔
192
        }
9✔
193
        reviewModel, reviewEffort := ParseModelEffort(reviewSpec)
12✔
194
        if reviewModel == taskModel && reviewEffort == taskEffort {
22✔
195
                return claudeExec, nil
10✔
196
        }
10✔
197

198
        reviewExec := &executor.ClaudeExecutor{
2✔
199
                OutputHandler: claudeExec.OutputHandler,
2✔
200
                Debug:         cfg.Debug,
2✔
201
                Model:         reviewModel,
2✔
202
                Effort:        reviewEffort,
2✔
203
        }
2✔
204
        cfg.applyClaudeAppConfig(reviewExec)
2✔
205
        return claudeExec, reviewExec
2✔
206
}
207

208
// applyClaudeAppConfig copies AppConfig-sourced fields onto a claude executor.
209
// no-op when AppConfig is nil.
210
func (cfg Config) applyClaudeAppConfig(e *executor.ClaudeExecutor) {
14✔
211
        if cfg.AppConfig == nil {
14✔
NEW
212
                return
×
NEW
213
        }
×
214
        e.Command = cfg.AppConfig.ClaudeCommand
14✔
215
        e.Args = cfg.AppConfig.ClaudeArgs
14✔
216
        e.ArgsSet = cfg.AppConfig.ClaudeArgsSet
14✔
217
        e.ErrorPatterns = cfg.AppConfig.ClaudeErrorPatterns
14✔
218
        e.LimitPatterns = cfg.AppConfig.ClaudeLimitPatterns
14✔
219
        e.IdleTimeout = cfg.AppConfig.IdleTimeout
14✔
220
        e.PreserveAPIKey = cfg.AppConfig.PreserveAnthropicAPIKey
14✔
221
}
222

223
// buildExternalCodexExecutor builds the codex executor used for the external review
224
// phase in claude mode. MultiAgent stays off (the external review prompt does not use
225
// spawn_agent) and PassClaudeMd stays off (rejected for claude mode by applyCodexOverrides).
226
func (cfg Config) buildExternalCodexExecutor(log Logger) *executor.CodexExecutor {
12✔
227
        e := cfg.newBaseCodexExecutor(log)
12✔
228
        if cfg.AppConfig != nil {
24✔
229
                e.Sandbox = cfg.AppConfig.CodexSandbox
12✔
230
        }
12✔
231
        return e
12✔
232
}
233

234
// buildCodexExecutor builds the codex executor used for first-class --codex mode.
235
// MultiAgent is always enabled so any phase (task, review, finalize) can spawn sub-agents,
236
// and PassClaudeMd is sourced from config. IdleTimeout is wired here (and only here)
237
// because the user explicitly opted into --codex; the external-review codex used in
238
// claude mode keeps master semantics with no idle timeout.
239
func (cfg Config) buildCodexExecutor(log Logger) *executor.CodexExecutor {
8✔
240
        e := cfg.newBaseCodexExecutor(log)
8✔
241
        e.MultiAgent = true
8✔
242
        if cfg.AppConfig != nil {
16✔
243
                e.Sandbox = cfg.AppConfig.CodexExecutorSandbox()
8✔
244
                e.PassClaudeMd = cfg.AppConfig.PassClaudeMd
8✔
245
                e.IdleTimeout = cfg.AppConfig.IdleTimeout
8✔
246
        }
8✔
247
        return e
8✔
248
}
249

250
// newBaseCodexExecutor returns a CodexExecutor populated with the fields shared
251
// between the external-review and first-class --codex builders. Callers layer on
252
// Sandbox, MultiAgent, PassClaudeMd, and IdleTimeout as appropriate for their
253
// role — see buildCodexExecutor (first-class) and buildExternalCodexExecutor
254
// (claude mode). IdleTimeout is intentionally NOT set here: applying it to the
255
// external codex review path silently shortened previously-idle-tolerant
256
// review sessions for default-claude users, so it is wired only by
257
// buildCodexExecutor where the user opted into --codex.
258
func (cfg Config) newBaseCodexExecutor(log Logger) *executor.CodexExecutor {
20✔
259
        e := &executor.CodexExecutor{
20✔
260
                OutputHandler: func(text string) { log.PrintAligned(text) },
20✔
261
                Debug:         cfg.Debug,
262
        }
263
        if cfg.AppConfig == nil {
20✔
NEW
264
                return e
×
NEW
265
        }
×
266
        e.Command = cfg.AppConfig.CodexCommand
20✔
267
        e.Model = cfg.AppConfig.CodexModel
20✔
268
        e.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
20✔
269
        e.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
20✔
270
        e.ErrorPatterns = cfg.AppConfig.CodexErrorPatterns
20✔
271
        e.LimitPatterns = cfg.AppConfig.CodexLimitPatterns
20✔
272
        return e
20✔
273
}
274

275
// buildCustomExecutor returns the optional custom external review executor.
276
// returns nil when no custom_review_script is configured.
277
func (cfg Config) buildCustomExecutor(log Logger) *executor.CustomExecutor {
20✔
278
        if cfg.AppConfig == nil || cfg.AppConfig.CustomReviewScript == "" {
39✔
279
                return nil
19✔
280
        }
19✔
281
        return &executor.CustomExecutor{
1✔
282
                Script: cfg.AppConfig.CustomReviewScript,
1✔
283
                OutputHandler: func(text string) {
1✔
NEW
284
                        log.PrintAligned(text)
×
NEW
285
                },
×
286
                ErrorPatterns: cfg.AppConfig.CodexErrorPatterns,
287
                LimitPatterns: cfg.AppConfig.CodexLimitPatterns,
288
        }
289
}
290

291
// NewWithExecutors creates a new Runner with custom executors (for testing).
292
func NewWithExecutors(cfg Config, log Logger, execs Executors, holder *status.PhaseHolder) *Runner {
186✔
293
        // determine iteration delay from config or default
186✔
294
        iterDelay := DefaultIterationDelay
186✔
295
        if cfg.IterationDelayMs > 0 {
235✔
296
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
49✔
297
        }
49✔
298

299
        // determine task retry count from config
300
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
301
        retryCount := 1
186✔
302
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
335✔
303
                retryCount = cfg.TaskRetryCount
149✔
304
        } else if cfg.TaskRetryCount > 0 {
187✔
305
                retryCount = cfg.TaskRetryCount
1✔
306
        }
1✔
307

308
        // determine wait-on-limit duration from config
309
        var waitOnLimit time.Duration
186✔
310
        if cfg.AppConfig != nil {
335✔
311
                waitOnLimit = cfg.AppConfig.WaitOnLimit
149✔
312
        }
149✔
313

314
        // if no separate review executor, use the same as task executor
315
        review := execs.Review
186✔
316
        if review == nil {
358✔
317
                review = execs.Task
172✔
318
        }
172✔
319

320
        return &Runner{
186✔
321
                cfg:            cfg,
186✔
322
                log:            log,
186✔
323
                task:           execs.Task,
186✔
324
                review:         review,
186✔
325
                external:       execs.External,
186✔
326
                custom:         execs.Custom,
186✔
327
                phaseHolder:    holder,
186✔
328
                iterationDelay: iterDelay,
186✔
329
                taskRetryCount: retryCount,
186✔
330
                waitOnLimit:    waitOnLimit,
186✔
331
        }
186✔
332
}
333

334
// claudeMdHintOnce ensures the user-level CLAUDE.md setup hint emits at most once
335
// per process, regardless of how many runners or phases are constructed.
336
var claudeMdHintOnce sync.Once
337

338
// maybeEmitClaudeMdSetupHint prints a one-time hint when ~/.claude/CLAUDE.md exists
339
// but ~/.codex/AGENTS.md does not. ralphex never creates the symlink itself; the
340
// user owns ~/.codex/. probing errors are swallowed so a missing or unreadable
341
// home directory simply suppresses the hint.
342
func maybeEmitClaudeMdSetupHint(log Logger) {
7✔
343
        claudeMdHintOnce.Do(func() {
12✔
344
                home, err := os.UserHomeDir()
5✔
345
                if err != nil || home == "" {
5✔
NEW
346
                        return
×
NEW
347
                }
×
348
                claudeMd := filepath.Join(home, ".claude", "CLAUDE.md")
5✔
349
                codexAgents := filepath.Join(home, ".codex", "AGENTS.md")
5✔
350
                if !fileExists(claudeMd) {
7✔
351
                        return
2✔
352
                }
2✔
353
                if fileExists(codexAgents) {
4✔
354
                        return
1✔
355
                }
1✔
356
                log.Print("hint: ~/.claude/CLAUDE.md exists but ~/.codex/AGENTS.md does not. " +
2✔
357
                        "to get user-level CLAUDE.md content into codex, link it: " +
2✔
358
                        "ln -s ~/.claude/CLAUDE.md ~/.codex/AGENTS.md")
2✔
359
        })
360
}
361

362
// fileExists reports whether path exists.
363
func fileExists(path string) bool {
8✔
364
        _, err := os.Stat(path)
8✔
365
        return err == nil
8✔
366
}
8✔
367

368
// SetInputCollector sets the input collector for plan creation mode.
369
func (r *Runner) SetInputCollector(c InputCollector) {
17✔
370
        r.inputCollector = c
17✔
371
}
17✔
372

373
// SetGitChecker sets the git checker for no-commit detection in review loops.
374
func (r *Runner) SetGitChecker(g GitChecker) {
8✔
375
        r.git = g
8✔
376
}
8✔
377

378
// SetBreakCh sets the break channel for manual termination of review and task loops.
379
// each value sent on the channel triggers one break event (repeatable, not close-based).
380
func (r *Runner) SetBreakCh(ch <-chan struct{}) {
7✔
381
        r.breakCh = ch
7✔
382
}
7✔
383

384
// SetPauseHandler sets the callback invoked when a break signal is received during task iteration.
385
// the handler should prompt the user and return true to resume or false to abort.
386
// if nil, break during task phase returns ErrUserAborted immediately.
387
func (r *Runner) SetPauseHandler(fn func(ctx context.Context) bool) {
3✔
388
        r.pauseHandler = fn
3✔
389
}
3✔
390

391
// Run executes the main loop based on configured mode.
392
func (r *Runner) Run(ctx context.Context) error {
105✔
393
        switch r.cfg.Mode {
105✔
394
        case ModeFull:
21✔
395
                return r.runFull(ctx)
21✔
396
        case ModeReview:
21✔
397
                return r.runReviewOnly(ctx)
21✔
398
        case ModeCodexOnly:
32✔
399
                return r.runCodexOnly(ctx)
32✔
400
        case ModeTasksOnly:
12✔
401
                return r.runTasksOnly(ctx)
12✔
402
        case ModePlan:
18✔
403
                return r.runPlanCreation(ctx)
18✔
404
        default:
1✔
405
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
1✔
406
        }
407
}
408

409
// runFull executes the complete pipeline: tasks → review → codex → review.
410
func (r *Runner) runFull(ctx context.Context) error {
21✔
411
        if r.cfg.PlanFile == "" {
22✔
412
                return errors.New("plan file required for full mode")
1✔
413
        }
1✔
414
        if err := r.validatePlanHasTasks(); err != nil {
21✔
415
                return err
1✔
416
        }
1✔
417

418
        // phase 1: task execution
419
        r.phaseHolder.Set(status.PhaseTask)
19✔
420
        r.log.PrintRaw("starting task execution phase\n")
19✔
421

19✔
422
        if err := r.runTaskPhase(ctx); err != nil {
28✔
423
                if errors.Is(err, ErrUserAborted) {
10✔
424
                        r.log.Print("task phase aborted by user")
1✔
425
                        return ErrUserAborted
1✔
426
                }
1✔
427
                return fmt.Errorf("task phase: %w", err)
8✔
428
        }
429

430
        // phase 2: first review pass - address ALL findings
431
        r.phaseHolder.Set(status.PhaseReview)
10✔
432
        r.log.PrintSection(r.reviewSection(0, ": all findings"))
10✔
433

10✔
434
        if err := r.runReview(ctx, r.prependCodexReviewGuidance(r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)), "first review pass"); err != nil {
10✔
435
                return fmt.Errorf("first review: %w", err)
×
436
        }
×
437

438
        // phase 2.1: review loop (critical/major) before codex external review
439
        if err := r.runReviewLoop(ctx); err != nil {
10✔
440
                return fmt.Errorf("pre-codex review loop: %w", err)
×
441
        }
×
442

443
        // phase 2.5+3: codex → post-codex review → finalize
444
        if err := r.runCodexAndPostReview(ctx); err != nil {
10✔
445
                return err
×
446
        }
×
447

448
        r.log.Print("all phases completed successfully")
10✔
449
        return nil
10✔
450
}
451

452
// runReviewOnly executes only the review pipeline: review → codex → review.
453
func (r *Runner) runReviewOnly(ctx context.Context) error {
21✔
454
        // phase 1: first review
21✔
455
        r.phaseHolder.Set(status.PhaseReview)
21✔
456
        r.log.PrintSection(r.reviewSection(0, ": all findings"))
21✔
457

21✔
458
        if err := r.runReview(ctx, r.prependCodexReviewGuidance(r.replacePromptVariables(r.cfg.AppConfig.ReviewFirstPrompt)), "first review pass"); err != nil {
23✔
459
                return fmt.Errorf("first review: %w", err)
2✔
460
        }
2✔
461

462
        // phase 1.1: review loop (critical/major) before codex external review
463
        if err := r.runReviewLoop(ctx); err != nil {
20✔
464
                return fmt.Errorf("pre-codex review loop: %w", err)
1✔
465
        }
1✔
466

467
        // phase 2+3: codex → post-codex review → finalize
468
        if err := r.runCodexAndPostReview(ctx); err != nil {
21✔
469
                return err
3✔
470
        }
3✔
471

472
        r.log.Print("review phases completed successfully")
15✔
473
        return nil
15✔
474
}
475

476
// runCodexOnly executes only the codex pipeline: codex → review → finalize.
477
func (r *Runner) runCodexOnly(ctx context.Context) error {
32✔
478
        if err := r.runCodexAndPostReview(ctx); err != nil {
33✔
479
                return err
1✔
480
        }
1✔
481

482
        r.log.Print("codex phases completed successfully")
31✔
483
        return nil
31✔
484
}
485

486
// runCodexAndPostReview runs the shared codex → post-codex claude review → finalize pipeline.
487
// used by runFull, runReviewOnly, and runCodexOnly to avoid duplicating this sequence.
488
func (r *Runner) runCodexAndPostReview(ctx context.Context) error {
60✔
489
        // short-circuit when external review is disabled: skip the misleading
60✔
490
        // "codex external review" section header and codex phase marker entirely,
60✔
491
        // and proceed straight to finalize. without this, dashboards would show
60✔
492
        // a "codex external review" header immediately followed by
60✔
493
        // "external review disabled, skipping..." from runCodexLoop.
60✔
494
        if r.externalReviewTool() == "none" {
86✔
495
                r.log.Print("external review disabled, skipping...")
26✔
496
                return r.runFinalize(ctx)
26✔
497
        }
26✔
498

499
        // codex external review loop
500
        r.phaseHolder.Set(status.PhaseCodex)
34✔
501
        r.log.PrintSection(status.NewGenericSection("codex external review"))
34✔
502

34✔
503
        hadFindings, err := r.runCodexLoop(ctx)
34✔
504
        if err != nil {
37✔
505
                return fmt.Errorf("codex loop: %w", err)
3✔
506
        }
3✔
507

508
        // skip post-codex claude review when external review found nothing on the first pass.
509
        // the purpose of this review is to catch regressions from fixes applied during the external
510
        // review loop — if no findings were reported, no fixes were made and there's nothing to regress.
511
        if !hadFindings {
46✔
512
                r.log.Print("external review found no issues, skipping post-codex claude review")
15✔
513
                return r.runFinalize(ctx)
15✔
514
        }
15✔
515

516
        // claude review loop (critical/major) after codex.
517
        // prepend commit-pending instruction only when external review actually ran,
518
        // because the loop may exit early (max iterations, stalemate, manual break)
519
        // leaving uncommitted fixes in the worktree.
520
        r.phaseHolder.Set(status.PhaseReview)
16✔
521

16✔
522
        var commitPrefix string
16✔
523
        if r.externalReviewTool() != "none" {
32✔
524
                commitPrefix = "IMPORTANT: Before starting the review, run `git status`. " +
16✔
525
                        "If there are uncommitted changes from previous review phases, " +
16✔
526
                        "stage and commit them with message: " +
16✔
527
                        "`fix: address code review findings`\n" +
16✔
528
                        "Then continue with the sequence below.\n\n"
16✔
529
        }
16✔
530
        if err := r.runReviewLoop(ctx, commitPrefix); err != nil {
16✔
531
                return fmt.Errorf("post-codex review loop: %w", err)
×
532
        }
×
533

534
        // optional finalize step (best-effort, but propagates context cancellation)
535
        return r.runFinalize(ctx)
16✔
536
}
537

538
// runTasksOnly executes only task phase, skipping all reviews.
539
func (r *Runner) runTasksOnly(ctx context.Context) error {
12✔
540
        if r.cfg.PlanFile == "" {
13✔
541
                return errors.New("plan file required for tasks-only mode")
1✔
542
        }
1✔
543
        if err := r.validatePlanHasTasks(); err != nil {
12✔
544
                return err
1✔
545
        }
1✔
546

547
        r.phaseHolder.Set(status.PhaseTask)
10✔
548
        r.log.PrintRaw("starting task execution phase\n")
10✔
549

10✔
550
        if err := r.runTaskPhase(ctx); err != nil {
14✔
551
                if errors.Is(err, ErrUserAborted) {
7✔
552
                        r.log.Print("task phase aborted by user")
3✔
553
                        return ErrUserAborted
3✔
554
                }
3✔
555
                return fmt.Errorf("task phase: %w", err)
1✔
556
        }
557

558
        r.log.Print("task execution completed successfully")
6✔
559
        return nil
6✔
560
}
561

562
// runTaskPhase executes tasks until completion or max iterations.
563
// executes ONE Task section per iteration. supports break (Ctrl+\) with pause+resume:
564
// on break, the current session is canceled, pauseHandler is called, and on resume
565
// the same iteration re-runs with a fresh session that re-reads the plan file.
566
func (r *Runner) runTaskPhase(ctx context.Context) error {
29✔
567
        if r.taskPhaseOverride != nil {
31✔
568
                return r.taskPhaseOverride(ctx)
2✔
569
        }
2✔
570
        prompt := r.prependCodexTaskGuidance(r.replacePromptVariables(r.cfg.AppConfig.TaskPrompt))
27✔
571
        retryCount := 0
27✔
572

27✔
573
        for i := 1; i <= r.cfg.MaxIterations; i++ {
61✔
574
                select {
34✔
575
                case <-ctx.Done():
1✔
576
                        return fmt.Errorf("task phase: %w", ctx.Err())
1✔
577
                default:
33✔
578
                }
579

580
                // use plan task position instead of loop counter for correct dashboard highlighting
581
                taskNum := i
33✔
582
                if pos := r.nextPlanTaskPosition(); pos > 0 {
48✔
583
                        taskNum = pos
15✔
584
                }
15✔
585
                r.log.PrintSection(status.NewTaskIterationSection(taskNum))
33✔
586

33✔
587
                // create per-iteration break context so Ctrl+\ cancels only the current session
33✔
588
                loopCtx, loopCancel := r.breakContext(ctx)
33✔
589

33✔
590
                execName := r.executorName()
33✔
591
                result := r.runWithLimitRetry(loopCtx, r.task.Run, prompt, execName)
33✔
592

33✔
593
                // check break before calling loopCancel — cancel would make loopCtx.Err() non-nil
33✔
594
                manualBreak := r.isBreak(loopCtx, ctx)
33✔
595
                loopCancel()
33✔
596

33✔
597
                if manualBreak {
37✔
598
                        r.log.Print("session interrupted by break signal")
4✔
599
                        r.drainBreakCh() // clear signal that may have arrived during cancellation
4✔
600
                        if r.pauseHandler == nil || !r.pauseHandler(ctx) {
6✔
601
                                return ErrUserAborted
2✔
602
                        }
2✔
603
                        // resume: decrement i to preserve iteration budget and re-run same task
604
                        r.drainBreakCh() // clear any signal received during pause prompt
2✔
605
                        i--
2✔
606
                        retryCount = 0
2✔
607
                        continue
2✔
608
                }
609

610
                if result.Error != nil {
32✔
611
                        if err := r.handlePatternMatchError(result.Error, execName); err != nil {
5✔
612
                                return err
2✔
613
                        }
2✔
614
                        return fmt.Errorf("%s execution: %w", execName, result.Error)
1✔
615
                }
616

617
                if result.Signal == SignalCompleted {
42✔
618
                        // verify plan actually has no uncompleted checkboxes
16✔
619
                        if r.hasUncompletedTasks() {
16✔
620
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
×
621
                                continue
×
622
                        }
623
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
16✔
624
                        return nil
16✔
625
                }
626

627
                if result.Signal == SignalFailed {
15✔
628
                        if retryCount < r.taskRetryCount {
7✔
629
                                r.log.Print("task failed, retrying...")
2✔
630
                                retryCount++
2✔
631
                                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
632
                                        return fmt.Errorf("interrupted: %w", err)
×
633
                                }
×
634
                                continue
2✔
635
                        }
636
                        return errors.New("task execution failed after retry (FAILED signal received)")
3✔
637
                }
638

639
                retryCount = 0
5✔
640
                // continue with same prompt - it reads from plan file each time
5✔
641
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
642
                        return fmt.Errorf("interrupted: %w", err)
1✔
643
                }
1✔
644
        }
645

646
        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
1✔
647
}
648

649
// runReview runs the configured review executor with the given prompt until REVIEW_DONE.
650
// phaseLabel identifies the phase in error messages and the soft-warning log line
651
// (today only "first review pass" but parameterized so a future caller doesn't ship
652
// a misleading message).
653
func (r *Runner) runReview(ctx context.Context, prompt, phaseLabel string) error {
31✔
654
        execName := r.executorName()
31✔
655
        result := r.runWithLimitRetry(ctx, r.review.Run, prompt, execName)
31✔
656
        if result.Error != nil {
31✔
NEW
657
                if err := r.handlePatternMatchError(result.Error, execName); err != nil {
×
658
                        return err
×
659
                }
×
NEW
660
                return fmt.Errorf("%s execution: %w", execName, result.Error)
×
661
        }
662

663
        if result.Signal == SignalFailed {
32✔
664
                return errors.New("review failed (FAILED signal received)")
1✔
665
        }
1✔
666

667
        // session/idle timeout cleared result.Error and result.Signal inside runWithSessionTimeout.
668
        // under first-class --codex the comprehensive first review is the only place we can
669
        // catch findings — silently advancing on timeout drops them — so surface the timeout as
670
        // an error. claude-default mode keeps master's soft-warning + continue behavior so
671
        // existing users with --session-timeout / --idle-timeout don't see new run failures.
672
        if r.lastSessionTimedOut {
32✔
673
                if r.cfg.isCodexExecutor() {
3✔
674
                        return fmt.Errorf("%s timed out", phaseLabel)
1✔
675
                }
1✔
676
                r.log.Print("warning: %s did not complete cleanly (session timed out), continuing...", phaseLabel)
1✔
677
                return nil
1✔
678
        }
679

680
        if !isReviewDone(result.Signal) {
28✔
NEW
681
                r.log.Print("warning: %s did not complete cleanly, continuing...", phaseLabel)
×
UNCOV
682
        }
×
683

684
        return nil
28✔
685
}
686

687
// runReviewLoop runs review iterations using second review prompt.
688
// optional promptPrefix is prepended to the review prompt (used for commit-pending instruction after codex).
689
func (r *Runner) runReviewLoop(ctx context.Context, promptPrefix ...string) error {
45✔
690
        // review iterations = 10% of max_iterations
45✔
691
        maxReviewIterations := max(minReviewIterations, r.cfg.MaxIterations/reviewIterationDivisor)
45✔
692

45✔
693
        prefix := ""
45✔
694
        if len(promptPrefix) > 0 {
61✔
695
                prefix = promptPrefix[0]
16✔
696
        }
16✔
697

698
        execName := r.executorName()
45✔
699
        for i := 1; i <= maxReviewIterations; i++ {
97✔
700
                select {
52✔
701
                case <-ctx.Done():
×
702
                        return fmt.Errorf("review: %w", ctx.Err())
×
703
                default:
52✔
704
                }
705

706
                r.log.PrintSection(r.reviewSection(i, ": critical/major"))
52✔
707

52✔
708
                // capture HEAD hash before running review executor for no-commit detection
52✔
709
                headBefore := r.headHash()
52✔
710

52✔
711
                result := r.runWithLimitRetry(ctx, r.review.Run,
52✔
712
                        prefix+r.prependCodexReviewGuidance(r.replacePromptVariables(r.cfg.AppConfig.ReviewSecondPrompt)), execName)
52✔
713
                if result.Error != nil {
53✔
714
                        if err := r.handlePatternMatchError(result.Error, execName); err != nil {
2✔
715
                                return err
1✔
716
                        }
1✔
NEW
717
                        return fmt.Errorf("%s execution: %w", execName, result.Error)
×
718
                }
719

720
                if result.Signal == SignalFailed {
51✔
721
                        return errors.New("review failed (FAILED signal received)")
×
722
                }
×
723

724
                if isReviewDone(result.Signal) {
92✔
725
                        r.log.Print("%s review complete - no more findings", execName)
41✔
726
                        return nil
41✔
727
                }
41✔
728

729
                // on session timeout, skip HEAD check and retry; the session was killed before
730
                // it could finish, so "no changes" doesn't mean "nothing to fix"
731
                if r.lastSessionTimedOut {
12✔
732
                        r.log.Print("session timed out, retrying review iteration...")
2✔
733
                        continue
2✔
734
                }
735

736
                // fallback: if HEAD hash hasn't changed, the reviewer found nothing to fix
737
                if headBefore != "" {
10✔
738
                        if headAfter := r.headHash(); headAfter == headBefore {
3✔
739
                                r.log.Print("%s review complete - no changes detected", execName)
1✔
740
                                return nil
1✔
741
                        }
1✔
742
                }
743

744
                r.log.Print("issues fixed, running another review iteration...")
7✔
745
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
7✔
746
                        return fmt.Errorf("interrupted: %w", err)
×
747
                }
×
748
        }
749

750
        r.log.Print("max %s review iterations reached, continuing...", execName)
2✔
751
        return nil
2✔
752
}
753

754
// headHash returns the current HEAD commit hash, or empty string if unavailable.
755
func (r *Runner) headHash() string {
74✔
756
        if r.git == nil {
119✔
757
                return ""
45✔
758
        }
45✔
759
        hash, err := r.git.HeadHash()
29✔
760
        if err != nil {
32✔
761
                r.log.Print("warning: failed to get HEAD hash: %v", err)
3✔
762
                return ""
3✔
763
        }
3✔
764
        return hash
26✔
765
}
766

767
// diffFingerprint returns a hash of the current working tree diff, or empty string if unavailable.
768
func (r *Runner) diffFingerprint() string {
20✔
769
        if r.git == nil {
23✔
770
                return ""
3✔
771
        }
3✔
772
        fp, err := r.git.DiffFingerprint()
17✔
773
        if err != nil {
17✔
774
                r.log.Print("warning: failed to get diff fingerprint: %v", err)
×
775
                return ""
×
776
        }
×
777
        return fp
17✔
778
}
779

780
// checkStalemate compares git state before and after claude evaluation to detect unchanged rounds.
781
// returns the updated unchanged round counter: incremented if no changes detected, reset to 0 otherwise.
782
// when diff fingerprints are unavailable (error), falls back to HEAD-only comparison.
783
func (r *Runner) checkStalemate(headBefore, headAfter, diffBefore, diffAfter string, unchangedRounds int) int {
6✔
784
        unchanged := headAfter == headBefore
6✔
785
        if diffBefore != "" && diffAfter != "" {
12✔
786
                unchanged = unchanged && diffAfter == diffBefore
6✔
787
        }
6✔
788
        if unchanged {
10✔
789
                return unchangedRounds + 1
4✔
790
        }
4✔
791
        return 0
2✔
792
}
793

794
// updateStalemate checks if review patience is enabled, computes the "after" git state,
795
// and returns the updated unchanged-rounds counter plus a flag indicating stalemate.
796
// skips the update if "after" values are empty (transient git error) to avoid resetting the counter.
797
func (r *Runner) updateStalemate(headBefore, diffBefore string, unchangedRounds int) (int, bool) {
26✔
798
        if r.cfg.ReviewPatience <= 0 || headBefore == "" {
46✔
799
                return unchangedRounds, false
20✔
800
        }
20✔
801
        // skip stalemate update if "after" values are empty (transient git error),
802
        // so errors don't reset unchangedRounds and inadvertently disable early exit
803
        if headAfter, diffAfter := r.headHash(), r.diffFingerprint(); headAfter != "" && diffAfter != "" {
12✔
804
                unchangedRounds = r.checkStalemate(headBefore, headAfter, diffBefore, diffAfter, unchangedRounds)
6✔
805
        }
6✔
806
        if unchangedRounds >= r.cfg.ReviewPatience {
7✔
807
                r.log.Print("stalemate detected after %d unchanged rounds, external review terminated early", unchangedRounds)
1✔
808
                return unchangedRounds, true
1✔
809
        }
1✔
810
        return unchangedRounds, false
5✔
811
}
812

813
// externalReviewTool returns the effective external review tool to use.
814
// an explicit ExternalReviewTool choice (e.g. via --external-review-tool) wins
815
// over codex_enabled=false back-compat; otherwise codex_enabled=false
816
// is treated as "none" so users with only that older setting still skip
817
// external review.
818
func (r *Runner) externalReviewTool() string {
110✔
819
        if r.cfg.ExternalReviewToolSet && r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
113✔
820
                return r.cfg.AppConfig.ExternalReviewTool
3✔
821
        }
3✔
822

823
        if !r.cfg.CodexEnabled {
130✔
824
                return "none"
23✔
825
        }
23✔
826

827
        if r.cfg.AppConfig != nil && r.cfg.AppConfig.ExternalReviewTool != "" {
168✔
828
                return r.cfg.AppConfig.ExternalReviewTool
84✔
829
        }
84✔
830

831
        return "codex"
×
832
}
833

834
// runCodexLoop runs the external review loop (codex or custom) until no findings.
835
func (r *Runner) runCodexLoop(ctx context.Context) (bool, error) {
34✔
836
        tool := r.externalReviewTool()
34✔
837

34✔
838
        // skip external review phase if disabled
34✔
839
        if tool == "none" {
34✔
UNCOV
840
                r.log.Print("external review disabled, skipping...")
×
UNCOV
841
                return false, nil
×
UNCOV
842
        }
×
843

844
        // custom review tool
845
        if tool == "custom" {
37✔
846
                if r.custom == nil {
4✔
847
                        return false, errors.New("custom review script not configured")
1✔
848
                }
1✔
849
                return r.runExternalReviewLoop(ctx, externalReviewConfig{
2✔
850
                        name:            "custom",
2✔
851
                        runReview:       func(ctx context.Context, prompt string) executor.Result { return r.custom.Run(ctx, prompt) },
4✔
852
                        buildPrompt:     r.buildCustomReviewPrompt,
853
                        buildEvalPrompt: r.buildCustomEvaluationPrompt,
854
                        showSummary:     func(string) {}, // no-op: custom output already streamed via OutputHandler
2✔
855
                        makeSection:     status.NewCustomIterationSection,
856
                })
857
        }
858

859
        // default: codex review
860
        return r.runExternalReviewLoop(ctx, externalReviewConfig{
31✔
861
                name:            "codex",
31✔
862
                runReview:       r.external.Run,
31✔
863
                buildPrompt:     r.buildCodexPrompt,
31✔
864
                buildEvalPrompt: r.buildCodexEvaluationPrompt,
31✔
865
                showSummary:     r.showCodexSummary,
31✔
866
                makeSection:     status.NewCodexIterationSection,
31✔
867
        })
31✔
868
}
869

870
// externalReviewConfig holds callbacks for running an external review tool.
871
type externalReviewConfig struct {
872
        name            string                                                   // tool name for error messages
873
        runReview       func(ctx context.Context, prompt string) executor.Result // run the external review tool
874
        buildPrompt     func(isFirst bool, claudeResponse string) string         // build prompt for review tool
875
        buildEvalPrompt func(output string) string                               // build evaluation prompt for claude
876
        showSummary     func(output string)                                      // display review findings summary
877
        makeSection     func(iteration int) status.Section                       // create section header
878
}
879

880
// runExternalReviewLoop runs a generic external review tool-claude loop.
881
// it terminates when no findings remain, max iterations are reached,
882
// stalemate is detected (review patience), or a manual break is requested.
883
// returns true if findings were found, meaning claude evaluated external review output
884
// and did not signal CodexDone (i.e., there were actionable issues requiring fixes).
885
func (r *Runner) runExternalReviewLoop(ctx context.Context, cfg externalReviewConfig) (bool, error) {
33✔
886
        maxIterations := max(minCodexIterations, r.cfg.MaxIterations/codexIterationDivisor)
33✔
887
        if r.cfg.MaxExternalIterations > 0 {
40✔
888
                maxIterations = r.cfg.MaxExternalIterations
7✔
889
        }
7✔
890

891
        // derive a child context that cancels when break channel fires
892
        loopCtx, loopCancel := r.breakContext(ctx)
33✔
893
        defer loopCancel()
33✔
894

33✔
895
        var claudeResponse string // first iteration has no prior response
33✔
896
        var unchangedRounds int   // consecutive iterations with no commits (for stalemate detection)
33✔
897
        firstCompleted := false   // tracks if any successful eval completed; controls diff scope for external tool
33✔
898
        hadFindings := false      // tracks if external review found any issues requiring fixes
33✔
899

33✔
900
        for i := 1; i <= maxIterations; i++ {
92✔
901
                select {
59✔
902
                case <-loopCtx.Done():
×
903
                        if r.isBreak(loopCtx, ctx) {
×
904
                                r.log.Print("manual break requested, external review terminated early")
×
905
                                return hadFindings, nil
×
906
                        }
×
907
                        return hadFindings, fmt.Errorf("%s loop: %w", cfg.name, ctx.Err())
×
908
                default:
59✔
909
                }
910

911
                r.log.PrintSection(cfg.makeSection(i))
59✔
912

59✔
913
                // run external review tool. use branch-wide diff until a successful claude eval completes,
59✔
914
                // so that a timeout on the first eval doesn't narrow subsequent reviews to working-tree only
59✔
915
                reviewResult := r.runWithLimitRetry(loopCtx, cfg.runReview, cfg.buildPrompt(!firstCompleted, claudeResponse), cfg.name)
59✔
916
                if reviewResult.Error != nil {
62✔
917
                        if r.isBreak(loopCtx, ctx) {
4✔
918
                                r.log.Print("manual break requested, external review terminated early")
1✔
919
                                return hadFindings, nil
1✔
920
                        }
1✔
921
                        if err := r.handlePatternMatchError(reviewResult.Error, cfg.name); err != nil {
3✔
922
                                return hadFindings, err
1✔
923
                        }
1✔
924
                        return hadFindings, fmt.Errorf("%s execution: %w", cfg.name, reviewResult.Error)
1✔
925
                }
926

927
                // idle/session timeout on external review: partial or empty output can't be trusted as a clean review.
928
                // retry on the next iteration with a fresh session instead of treating it as a clean skip.
929
                if r.lastSessionTimedOut {
58✔
930
                        r.log.Print("%s review session timed out, retrying on next iteration...", cfg.name)
2✔
931
                        continue
2✔
932
                }
933

934
                if reviewResult.Output == "" {
58✔
935
                        r.log.Print("%s review returned no output, skipping...", cfg.name)
4✔
936
                        break
4✔
937
                }
938

939
                // show findings summary before Claude evaluation
940
                cfg.showSummary(reviewResult.Output)
50✔
941

50✔
942
                // capture state before claude evaluation for stalemate detection (only when enabled)
50✔
943
                var headBefore, diffBefore string
50✔
944
                if r.cfg.ReviewPatience > 0 {
64✔
945
                        headBefore = r.headHash()
14✔
946
                        diffBefore = r.diffFingerprint()
14✔
947
                }
14✔
948

949
                // pass output to claude for evaluation and fixing
950
                r.phaseHolder.Set(status.PhaseClaudeEval)
50✔
951
                r.log.PrintSection(status.NewClaudeEvalSection())
50✔
952
                claudeResult := r.runWithLimitRetry(loopCtx, r.review.Run, cfg.buildEvalPrompt(reviewResult.Output), "claude")
50✔
953

50✔
954
                // restore codex phase for next iteration
50✔
955
                r.phaseHolder.Set(status.PhaseCodex)
50✔
956
                if claudeResult.Error != nil {
50✔
957
                        if r.isBreak(loopCtx, ctx) {
×
958
                                r.log.Print("manual break requested, external review terminated early")
×
959
                                return hadFindings, nil
×
960
                        }
×
961
                        if err := r.handlePatternMatchError(claudeResult.Error, "claude"); err != nil {
×
962
                                return hadFindings, err
×
963
                        }
×
964
                        return hadFindings, fmt.Errorf("claude execution: %w", claudeResult.Error)
×
965
                }
966

967
                // on session timeout, skip response capture and stalemate detection; the session was killed
968
                // before it could finish, so partial output can't be trusted as previous context and
969
                // "no changes" doesn't mean "nothing to fix"
970
                if r.lastSessionTimedOut {
53✔
971
                        r.log.Print("claude eval session timed out, retrying %s iteration...", cfg.name)
3✔
972
                        continue
3✔
973
                }
974

975
                firstCompleted = true // successful eval completed, next iteration can use working-tree diff
47✔
976
                claudeResponse = claudeResult.Output
47✔
977

47✔
978
                // exit only when claude sees "no findings"
47✔
979
                if isCodexDone(claudeResult.Signal) {
68✔
980
                        r.log.Print("%s review complete - no more findings", cfg.name)
21✔
981
                        return hadFindings, nil
21✔
982
                }
21✔
983

984
                // findings were reported and need fixing — mark for post-codex review
985
                hadFindings = true
26✔
986

26✔
987
                // stalemate detection: track consecutive rounds with no changes (commits or working tree edits).
26✔
988
                // the eval prompt tells claude not to commit during fix rounds, so HEAD alone can't distinguish
26✔
989
                // "rejected findings" from "made fixes without commit". checking the diff fingerprint catches
26✔
990
                // working tree edits, making the detection accurate for both cases.
26✔
991
                var stalemate bool
26✔
992
                unchangedRounds, stalemate = r.updateStalemate(headBefore, diffBefore, unchangedRounds)
26✔
993
                if stalemate {
27✔
994
                        return hadFindings, nil
1✔
995
                }
1✔
996

997
                if err := r.sleepWithContext(loopCtx, r.iterationDelay); err != nil {
25✔
998
                        if r.isBreak(loopCtx, ctx) {
×
999
                                r.log.Print("manual break requested, external review terminated early")
×
1000
                                return hadFindings, nil
×
1001
                        }
×
1002
                        return hadFindings, fmt.Errorf("interrupted: %w", err)
×
1003
                }
1004
        }
1005

1006
        r.log.Print("max %s iterations reached, continuing to next phase...", cfg.name)
8✔
1007
        return hadFindings, nil
8✔
1008
}
1009

1010
// breakContext derives a child context that cancels when one value is drained from the break channel.
1011
// if no break channel is configured, returns the parent context and a no-op cancel.
1012
// callers detect break by checking loopCtx.Err() != nil && parentCtx.Err() == nil.
1013
func (r *Runner) breakContext(parent context.Context) (context.Context, context.CancelFunc) {
66✔
1014
        if r.breakCh == nil {
125✔
1015
                return parent, func() {}
118✔
1016
        }
1017
        ctx, cancel := context.WithCancel(parent)
7✔
1018
        go func() {
14✔
1019
                select {
7✔
1020
                case <-r.breakCh:
5✔
1021
                        cancel()
5✔
1022
                case <-ctx.Done():
2✔
1023
                }
1024
        }()
1025
        return ctx, cancel
7✔
1026
}
1027

1028
// isBreak returns true if the loop context was canceled by a break signal
1029
// while the parent context is still alive. does not read from the break channel,
1030
// so it can be called without consuming a pending signal.
1031
func (r *Runner) isBreak(loopCtx, parentCtx context.Context) bool {
36✔
1032
        return loopCtx.Err() != nil && parentCtx.Err() == nil
36✔
1033
}
36✔
1034

1035
// drainBreakCh does a non-blocking drain of one pending value from the break channel.
1036
// called after pause+resume to prevent a SIGQUIT received during the pause prompt
1037
// from immediately canceling the next iteration. not called on normal iteration
1038
// boundaries so that a legitimate Ctrl+\ between iterations is preserved.
1039
func (r *Runner) drainBreakCh() {
9✔
1040
        if r.breakCh == nil {
10✔
1041
                return
1✔
1042
        }
1✔
1043
        select {
8✔
1044
        case <-r.breakCh:
2✔
1045
        default:
6✔
1046
        }
1047
}
1048

1049
// buildCodexPrompt creates the prompt for codex review.
1050
// uses the codex_review prompt loaded from config with all variables expanded,
1051
// including {{PREVIOUS_REVIEW_CONTEXT}} for iteration context.
1052
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
65✔
1053
        return r.replaceVariablesWithIteration(r.cfg.AppConfig.CodexReviewPrompt, isFirst, claudeResponse)
65✔
1054
}
65✔
1055

1056
// validatePlanHasTasks returns an error if the plan file has no executable task sections.
1057
// guards against spec/reference docs that lack ### Task N: / ### Iteration N: headers,
1058
// which would otherwise cause the task loop to retry TASK_FAILED until exhaustion.
1059
// callers must ensure r.cfg.PlanFile is non-empty before invoking.
1060
func (r *Runner) validatePlanHasTasks() error {
40✔
1061
        path := r.resolvePlanFilePath()
40✔
1062
        p, err := plan.ParsePlanFile(path)
40✔
1063
        if err != nil {
41✔
1064
                return fmt.Errorf("parse plan for validation: %w", err)
1✔
1065
        }
1✔
1066
        if len(p.Tasks) == 0 {
45✔
1067
                return fmt.Errorf("plan file %q has no executable task sections (### Task N: or ### Iteration N:); add task sections or pass a different plan file", path)
6✔
1068
        }
6✔
1069
        return nil
33✔
1070
}
1071

1072
// hasUncompletedTasks checks if any Task section has uncompleted checkboxes.
1073
// only Task sections (### Task N: or ### Iteration N:) are considered.
1074
// checkboxes in Success criteria, Overview, or Context are ignored for this check,
1075
// so the agent can output ALL_TASKS_DONE when those are verification-only.
1076
// for malformed plans (checkboxes without task headers), returns true if any [ ] exists.
1077
// returns false if the plan file is missing after resolvePlanFilePath exhausts all probes
1078
// (original, <dir>/<alt-date-basename>, completed/<basename>, completed/<alt-date-basename>),
1079
// to avoid spinning the loop when an LLM-driven git mv renamed the file out from under
1080
// the runtime.
1081
func (r *Runner) hasUncompletedTasks() bool {
27✔
1082
        path := r.resolvePlanFilePath()
27✔
1083
        if path == "" {
27✔
1084
                return false // no plan file, nothing to complete
×
1085
        }
×
1086
        p, err := plan.ParsePlanFile(path)
27✔
1087
        if err != nil {
28✔
1088
                // last line of defense: resolvePlanFilePath has already tried the original path,
1✔
1089
                // the in-place alternate-date rename, completed/<basename>, and completed/<alt-date>.
1✔
1090
                // if the file is still missing, treat the run as complete so a vanished plan does
1✔
1091
                // not spin the loop.
1✔
1092
                if errors.Is(err, fs.ErrNotExist) {
2✔
1093
                        return false
1✔
1094
                }
1✔
1095
                r.log.Print("[WARN] failed to parse plan file for completion check: %v", err)
×
1096
                return true // assume incomplete if can't read
×
1097
        }
1098
        for _, t := range p.Tasks {
55✔
1099
                if t.HasUncompletedActionableWork() {
33✔
1100
                        return true
4✔
1101
                }
4✔
1102
        }
1103
        // malformed plans: no task headers but file has [ ] — treat as incomplete
1104
        if len(p.Tasks) == 0 {
24✔
1105
                has, err := plan.FileHasUncompletedCheckbox(path)
2✔
1106
                if err != nil {
2✔
1107
                        return true
×
1108
                }
×
1109
                if has {
3✔
1110
                        return true
1✔
1111
                }
1✔
1112
        }
1113
        return false
21✔
1114
}
1115

1116
// nextPlanTaskPosition returns the 1-indexed position of the first uncompleted task in the plan.
1117
// returns 0 if the plan file can't be read/parsed or no uncompleted tasks exist (caller falls back to loop counter).
1118
func (r *Runner) nextPlanTaskPosition() int {
43✔
1119
        p, err := plan.ParsePlanFile(r.resolvePlanFilePath())
43✔
1120
        if err != nil {
45✔
1121
                r.log.Print("[WARN] failed to parse plan file for task position: %v", err)
2✔
1122
                return 0
2✔
1123
        }
2✔
1124
        for i, t := range p.Tasks {
91✔
1125
                if t.HasUncompletedActionableWork() {
71✔
1126
                        return i + 1 // 1-indexed
21✔
1127
                }
21✔
1128
        }
1129
        return 0
20✔
1130
}
1131

1132
// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
1133
// extracts text until first code block or maxCodexSummaryLen chars, whichever is shorter.
1134
func (r *Runner) showCodexSummary(output string) {
48✔
1135
        r.showExternalReviewSummary("codex", output)
48✔
1136
}
48✔
1137

1138
// showExternalReviewSummary displays a condensed summary of external review output.
1139
// extracts text until first code block or 5000 chars, whichever is shorter.
1140
func (r *Runner) showExternalReviewSummary(toolName, output string) {
48✔
1141
        summary := output
48✔
1142

48✔
1143
        // trim to first code block if present
48✔
1144
        if idx := strings.Index(summary, "```"); idx > 0 {
48✔
1145
                summary = summary[:idx]
×
1146
        }
×
1147

1148
        // limit to maxCodexSummaryLen runes to avoid splitting multi-byte characters
1149
        if runes := []rune(summary); len(runes) > maxCodexSummaryLen {
48✔
1150
                summary = string(runes[:maxCodexSummaryLen]) + "..."
×
1151
        }
×
1152

1153
        summary = strings.TrimSpace(summary)
48✔
1154
        if summary == "" {
48✔
1155
                return
×
1156
        }
×
1157

1158
        r.log.Print("%s findings:", toolName)
48✔
1159
        for line := range strings.SplitSeq(summary, "\n") {
96✔
1160
                if strings.TrimSpace(line) == "" {
48✔
1161
                        continue
×
1162
                }
1163
                r.log.PrintAligned("  " + line)
48✔
1164
        }
1165
}
1166

1167
// ErrUserAborted is a sentinel error returned when the user aborts or declines to resume after a break
1168
// signal (Ctrl+\). it is propagated as a non-nil error so that callers (including mode entrypoints) can
1169
// detect it and treat it as a clean user-initiated exit, avoiding further review/finalize steps.
1170
var ErrUserAborted = errors.New("user aborted")
1171

1172
// ErrUserRejectedPlan is returned when user rejects the plan draft.
1173
var ErrUserRejectedPlan = errors.New("user rejected plan")
1174

1175
// draftReviewResult holds the result of draft review handling.
1176
type draftReviewResult struct {
1177
        handled  bool   // true if draft was found and handled
1178
        feedback string // revision feedback (non-empty only for "revise" action)
1179
        err      error  // error if review failed or user rejected
1180
}
1181

1182
// handlePlanDraft processes PLAN_DRAFT signal if present in output.
1183
// returns result indicating whether draft was handled and any feedback/errors.
1184
func (r *Runner) handlePlanDraft(ctx context.Context, output string) draftReviewResult {
16✔
1185
        planContent, draftErr := parsePlanDraftPayload(output)
16✔
1186
        if draftErr != nil {
25✔
1187
                // log malformed signals (but not "no signal" which is expected)
9✔
1188
                if !errors.Is(draftErr, errNoPlanDraftSignal) {
10✔
1189
                        r.log.Print("warning: %v", draftErr)
1✔
1190
                }
1✔
1191
                return draftReviewResult{handled: false}
9✔
1192
        }
1193

1194
        r.log.Print("plan draft ready for review")
7✔
1195

7✔
1196
        action, feedback, askErr := r.inputCollector.AskDraftReview(ctx, "Review the plan draft", planContent)
7✔
1197
        if askErr != nil {
8✔
1198
                return draftReviewResult{handled: true, err: fmt.Errorf("collect draft review: %w", askErr)}
1✔
1199
        }
1✔
1200

1201
        // log the draft review action and feedback to progress file
1202
        r.log.LogDraftReview(action, feedback)
6✔
1203

6✔
1204
        switch action {
6✔
1205
        case "accept":
3✔
1206
                r.log.Print("draft accepted, continuing to write plan file...")
3✔
1207
                return draftReviewResult{handled: true}
3✔
1208
        case "revise":
2✔
1209
                r.log.Print("revision requested, re-running with feedback...")
2✔
1210
                return draftReviewResult{handled: true, feedback: feedback}
2✔
1211
        case "reject":
1✔
1212
                r.log.Print("plan rejected by user")
1✔
1213
                return draftReviewResult{handled: true, err: ErrUserRejectedPlan}
1✔
1214
        }
1215

1216
        return draftReviewResult{handled: true}
×
1217
}
1218

1219
// handlePlanQuestion processes QUESTION signal if present in output.
1220
// returns true if question was found and handled, false otherwise.
1221
// returns error if question handling failed.
1222
func (r *Runner) handlePlanQuestion(ctx context.Context, output string) (bool, error) {
9✔
1223
        question, err := parseQuestionPayload(output)
9✔
1224
        if err != nil {
15✔
1225
                // log malformed signals (but not "no signal" which is expected)
6✔
1226
                if !errors.Is(err, errNoQuestionSignal) {
6✔
1227
                        r.log.Print("warning: %v", err)
×
1228
                }
×
1229
                return false, nil
6✔
1230
        }
1231

1232
        r.log.LogQuestion(question.Question, question.Options)
3✔
1233

3✔
1234
        answer, askErr := r.inputCollector.AskQuestion(ctx, question.Question, question.Options)
3✔
1235
        if askErr != nil {
4✔
1236
                return true, fmt.Errorf("collect answer: %w", askErr)
1✔
1237
        }
1✔
1238

1239
        r.log.LogAnswer(answer)
2✔
1240
        return true, nil
2✔
1241
}
1242

1243
// runPlanCreation executes the interactive plan creation loop.
1244
// the loop continues until PLAN_READY signal or max iterations reached.
1245
// handles QUESTION signals for Q&A and PLAN_DRAFT signals for draft review.
1246
func (r *Runner) runPlanCreation(ctx context.Context) error {
18✔
1247
        if r.cfg.PlanDescription == "" {
19✔
1248
                return errors.New("plan description required for plan mode")
1✔
1249
        }
1✔
1250
        if r.inputCollector == nil {
18✔
1251
                return errors.New("input collector required for plan mode")
1✔
1252
        }
1✔
1253

1254
        r.phaseHolder.Set(status.PhasePlan)
16✔
1255
        r.log.PrintRaw("starting interactive plan creation\n")
16✔
1256
        r.log.Print("plan request: %s", r.cfg.PlanDescription)
16✔
1257

16✔
1258
        // plan iterations use 20% of max_iterations
16✔
1259
        maxPlanIterations := max(minPlanIterations, r.cfg.MaxIterations/planIterationDivisor)
16✔
1260

16✔
1261
        // track revision feedback for context in next iteration
16✔
1262
        var lastRevisionFeedback string
16✔
1263

16✔
1264
        for i := 1; i <= maxPlanIterations; i++ {
46✔
1265
                select {
30✔
1266
                case <-ctx.Done():
1✔
1267
                        return fmt.Errorf("plan creation: %w", ctx.Err())
1✔
1268
                default:
29✔
1269
                }
1270

1271
                r.log.PrintSection(status.NewPlanIterationSection(i))
29✔
1272

29✔
1273
                prompt := r.buildPlanPrompt()
29✔
1274
                // append revision feedback context if present
29✔
1275
                hadFeedback := lastRevisionFeedback != ""
29✔
1276
                if hadFeedback {
32✔
1277
                        prompt = fmt.Sprintf("%s\n\n---\nPREVIOUS DRAFT FEEDBACK:\nUser requested revisions with this feedback:\n%s\n\nPlease revise the plan accordingly and present a new PLAN_DRAFT.", prompt, lastRevisionFeedback)
3✔
1278
                }
3✔
1279

1280
                execName := r.executorName()
29✔
1281
                result := r.runWithLimitRetry(ctx, r.task.Run, prompt, execName)
29✔
1282
                if result.Error != nil {
31✔
1283
                        if err := r.handlePatternMatchError(result.Error, execName); err != nil {
3✔
1284
                                return err
1✔
1285
                        }
1✔
1286
                        return fmt.Errorf("%s execution: %w", execName, result.Error)
1✔
1287
                }
1288

1289
                if result.Signal == SignalFailed {
28✔
1290
                        return errors.New("plan creation failed (FAILED signal received)")
1✔
1291
                }
1✔
1292

1293
                // check for PLAN_READY signal
1294
                if isPlanReady(result.Signal) {
34✔
1295
                        r.log.Print("plan creation completed")
8✔
1296
                        return nil
8✔
1297
                }
8✔
1298

1299
                // on session timeout, skip output parsing and retry; the session was killed before
1300
                // it could finish, so partial output may contain truncated PLAN_DRAFT or QUESTION markers.
1301
                // preserve lastRevisionFeedback so the next attempt re-sends the user's revision request
1302
                if r.lastSessionTimedOut {
20✔
1303
                        r.log.Print("plan creation session timed out, retrying iteration...")
2✔
1304
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1305
                                return fmt.Errorf("interrupted: %w", err)
×
1306
                        }
×
1307
                        continue
2✔
1308
                }
1309

1310
                // session completed successfully, clear revision feedback since it was consumed
1311
                if hadFeedback {
17✔
1312
                        lastRevisionFeedback = ""
1✔
1313
                }
1✔
1314

1315
                // check for PLAN_DRAFT signal - present draft for user review
1316
                draftResult := r.handlePlanDraft(ctx, result.Output)
16✔
1317
                if draftResult.err != nil {
18✔
1318
                        return draftResult.err
2✔
1319
                }
2✔
1320
                if draftResult.handled {
19✔
1321
                        lastRevisionFeedback = draftResult.feedback
5✔
1322
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
5✔
1323
                                return fmt.Errorf("interrupted: %w", err)
×
1324
                        }
×
1325
                        continue
5✔
1326
                }
1327

1328
                // check for QUESTION signal
1329
                handled, err := r.handlePlanQuestion(ctx, result.Output)
9✔
1330
                if err != nil {
10✔
1331
                        return err
1✔
1332
                }
1✔
1333
                if handled {
10✔
1334
                        if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
2✔
1335
                                return fmt.Errorf("interrupted: %w", err)
×
1336
                        }
×
1337
                        continue
2✔
1338
                }
1339

1340
                // no question, no draft, and no completion - continue
1341
                if err := r.sleepWithContext(ctx, r.iterationDelay); err != nil {
6✔
1342
                        return fmt.Errorf("interrupted: %w", err)
×
1343
                }
×
1344
        }
1345

1346
        return fmt.Errorf("max plan iterations (%d) reached without completion", maxPlanIterations)
1✔
1347
}
1348

1349
// handlePatternMatchError checks if err is a PatternMatchError or LimitPatternError and logs appropriate messages.
1350
// Returns the error if it's a pattern match (to trigger graceful exit), nil otherwise.
1351
func (r *Runner) handlePatternMatchError(err error, tool string) error {
10✔
1352
        var patternErr *executor.PatternMatchError
10✔
1353
        if errors.As(err, &patternErr) {
14✔
1354
                r.log.Print("error: detected %q in %s output", patternErr.Pattern, tool)
4✔
1355
                r.log.Print("run '%s' for more information", patternErr.HelpCmd)
4✔
1356
                return err
4✔
1357
        }
4✔
1358
        var limitErr *executor.LimitPatternError
6✔
1359
        if errors.As(err, &limitErr) {
8✔
1360
                r.log.Print("error: detected %q in %s output", limitErr.Pattern, tool)
2✔
1361
                r.log.Print("run '%s' for more information", limitErr.HelpCmd)
2✔
1362
                return err
2✔
1363
        }
2✔
1364
        return nil
4✔
1365
}
1366

1367
// runWithLimitRetry wraps an executor Run() call with rate limit retry logic and optional session timeout.
1368
// if the result contains a LimitPatternError and waitOnLimit > 0, it logs a message, waits, and retries.
1369
// if waitOnLimit == 0, the LimitPatternError is returned as-is (existing exit behavior).
1370
// other errors (including PatternMatchError) are returned without retry.
1371
// when SessionTimeout > 0, each run() call gets a child context with deadline.
1372
// on session timeout (child timed out but parent alive), logs a warning and returns result with error cleared.
1373
// retries indefinitely until success or context cancellation.
1374
func (r *Runner) runWithLimitRetry(ctx context.Context, run func(context.Context, string) executor.Result,
1375
        prompt, toolName string) executor.Result {
272✔
1376
        for {
549✔
1377
                result := r.runWithSessionTimeout(ctx, run, prompt, toolName)
277✔
1378
                if result.Error == nil {
530✔
1379
                        return result
253✔
1380
                }
253✔
1381

1382
                var limitErr *executor.LimitPatternError
24✔
1383
                if !errors.As(result.Error, &limitErr) {
39✔
1384
                        return result // not a limit error, return as-is
15✔
1385
                }
15✔
1386

1387
                if r.waitOnLimit <= 0 {
12✔
1388
                        return result // no wait configured, return limit error as-is
3✔
1389
                }
3✔
1390

1391
                r.log.Print("rate limit detected: %q in %s output, waiting %s before retry...",
6✔
1392
                        limitErr.Pattern, toolName, r.waitOnLimit)
6✔
1393

6✔
1394
                if err := r.sleepWithContext(ctx, r.waitOnLimit); err != nil {
7✔
1395
                        return executor.Result{Error: fmt.Errorf("interrupted during limit wait: %w", ctx.Err())}
1✔
1396
                }
1✔
1397
        }
1398
}
1399

1400
// runWithSessionTimeout runs the executor with an optional session timeout.
1401
// when SessionTimeout > 0, wraps ctx with context.WithTimeout before calling run.
1402
// on session timeout (child timed out but parent alive), logs a warning and clears the error
1403
// so callers treat it as a non-completing iteration that continues naturally.
1404
// applies to: claude calls in default executor mode (any toolName=="claude" call), every executor call under
1405
// --codex (task, review, finalize, evaluation). external codex/custom review in default executor mode is
1406
// NOT subject to session_timeout — preserves existing behavior. toolName is used
1407
// for both gating and log message phrasing.
1408
func (r *Runner) runWithSessionTimeout(ctx context.Context, run func(context.Context, string) executor.Result,
1409
        prompt, toolName string) executor.Result {
289✔
1410
        r.lastSessionTimedOut = false
289✔
1411
        sessionTimeout := r.sessionTimeout()
289✔
1412
        codexMode := r.cfg.isCodexExecutor()
289✔
1413
        useTimeout := sessionTimeout > 0 && (codexMode || toolName == "claude")
289✔
1414

289✔
1415
        if !useTimeout {
552✔
1416
                result := run(ctx, prompt)
263✔
1417
                r.handleIdleTimeout(result, toolName)
263✔
1418
                return result
263✔
1419
        }
263✔
1420

1421
        childCtx, cancel := context.WithTimeout(ctx, sessionTimeout)
26✔
1422
        defer cancel()
26✔
1423

26✔
1424
        result := run(childCtx, prompt)
26✔
1425

26✔
1426
        // check if this was a session timeout: child context expired but parent is still alive.
26✔
1427
        // clear the error so callers (task loop, review loop) treat it as a non-completing iteration
26✔
1428
        // rather than aborting the phase. set lastSessionTimedOut so review loops can distinguish
26✔
1429
        // timeout from "genuinely found nothing" and continue instead of exiting.
26✔
1430
        if childCtx.Err() != nil && ctx.Err() == nil {
38✔
1431
                r.log.Print("warning: %s session timed out after %s, the agent may have started a blocking operation",
12✔
1432
                        toolName, sessionTimeout)
12✔
1433
                result.Error = nil
12✔
1434
                result.Signal = "" // clear any signal emitted before timeout; can't trust partial session
12✔
1435
                r.lastSessionTimedOut = true
12✔
1436
                return result
12✔
1437
        }
12✔
1438

1439
        r.handleIdleTimeout(result, toolName)
14✔
1440
        return result
14✔
1441
}
1442

1443
// handleIdleTimeout logs the idle-timeout diagnostic and flags it as a session-timeout
1444
// equivalent for review loops. an idle timeout without a signal looks like "nothing to
1445
// fix", so callers must distinguish it from a clean completion or they exit prematurely.
1446
func (r *Runner) handleIdleTimeout(result executor.Result, toolName string) {
277✔
1447
        if result.IdleTimedOut && result.Signal == "" {
282✔
1448
                r.log.Print("warning: %s session idle timed out, no output activity detected", toolName)
5✔
1449
                r.lastSessionTimedOut = true
5✔
1450
        }
5✔
1451
}
1452

1453
// sessionTimeout returns the configured session timeout duration.
1454
// returns 0 if not configured or AppConfig is nil.
1455
func (r *Runner) sessionTimeout() time.Duration {
289✔
1456
        if r.cfg.AppConfig == nil {
289✔
1457
                return 0
×
1458
        }
×
1459
        return r.cfg.AppConfig.SessionTimeout
289✔
1460
}
1461

1462
// executorName returns the name used in user-facing log messages and pattern-error
1463
// help text for the configured task/review executor. "codex" when --codex is in
1464
// effect, "claude" otherwise. external review uses its own label.
1465
func (r *Runner) executorName() string {
149✔
1466
        if r.cfg.isCodexExecutor() {
162✔
1467
                return "codex"
13✔
1468
        }
13✔
1469
        return "claude"
136✔
1470
}
1471

1472
func (r *Runner) reviewSection(iteration int, suffix string) status.Section {
83✔
1473
        if r.cfg.isCodexExecutor() {
92✔
1474
                return status.NewInternalReviewSection(iteration, suffix)
9✔
1475
        }
9✔
1476
        return status.NewClaudeReviewSection(iteration, suffix)
74✔
1477
}
1478

1479
// runFinalize executes the optional finalize step after successful reviews.
1480
// runs once, best-effort: failures are logged but don't block success.
1481
// exception: context cancellation is propagated (user wants to abort).
1482
func (r *Runner) runFinalize(ctx context.Context) error {
57✔
1483
        if !r.cfg.FinalizeEnabled {
103✔
1484
                return nil
46✔
1485
        }
46✔
1486

1487
        r.phaseHolder.Set(status.PhaseFinalize)
11✔
1488
        r.log.PrintSection(status.NewGenericSection("finalize step"))
11✔
1489

11✔
1490
        prompt := r.replacePromptVariables(r.cfg.AppConfig.FinalizePrompt)
11✔
1491
        execName := r.executorName()
11✔
1492
        result := r.runWithLimitRetry(ctx, r.review.Run, prompt, execName)
11✔
1493

11✔
1494
        if result.Error != nil {
14✔
1495
                // propagate context cancellation - user wants to abort
3✔
1496
                if errors.Is(result.Error, context.Canceled) || errors.Is(result.Error, context.DeadlineExceeded) {
4✔
1497
                        return fmt.Errorf("finalize step: %w", result.Error)
1✔
1498
                }
1✔
1499
                // pattern match (rate limit or error) - log via shared helper, but don't fail (best-effort)
1500
                if r.handlePatternMatchError(result.Error, execName) != nil {
3✔
1501
                        return nil //nolint:nilerr // intentional: best-effort semantics, log but don't propagate
1✔
1502
                }
1✔
1503
                // best-effort: log error but don't fail
1504
                r.log.Print("finalize step failed: %v", result.Error)
1✔
1505
                return nil
1✔
1506
        }
1507

1508
        if result.Signal == SignalFailed {
9✔
1509
                r.log.Print("finalize step reported failure (non-blocking)")
1✔
1510
                return nil
1✔
1511
        }
1✔
1512

1513
        r.log.Print("finalize step completed")
7✔
1514
        return nil
7✔
1515
}
1516

1517
// sleepWithContext pauses for the given duration but returns immediately if context is canceled.
1518
// returns ctx.Err() on cancellation, nil on normal completion.
1519
func (r *Runner) sleepWithContext(ctx context.Context, d time.Duration) error {
60✔
1520
        t := time.NewTimer(d)
60✔
1521
        defer t.Stop()
60✔
1522
        select {
60✔
1523
        case <-t.C:
58✔
1524
                return nil
58✔
1525
        case <-ctx.Done():
2✔
1526
                return fmt.Errorf("sleep interrupted: %w", ctx.Err())
2✔
1527
        }
1528
}
1529

1530
// needsCodexBinary returns true when external codex review needs the codex binary.
1531
// first-class codex executor dependency checks happen in cmd/ralphex before runner construction.
1532
func needsCodexBinary(appConfig *config.Config) bool {
3✔
1533
        if appConfig == nil {
3✔
1534
                return true // default behavior assumes codex
×
1535
        }
×
1536
        switch appConfig.ExternalReviewTool {
3✔
1537
        case "custom", "none":
2✔
1538
                return false
2✔
1539
        default:
1✔
1540
                return true // "codex" or empty (default) requires codex binary
1✔
1541
        }
1542
}
1543

1544
// ParseModelEffort splits a "model[:effort]" spec into separate parts.
1545
// Used by New to parse task_model/review_model config values into the
1546
// ClaudeExecutor.Model and ClaudeExecutor.Effort fields.
1547
// Empty input returns ("", ""). Missing colon returns (s, "").
1548
// A leading colon (":high") returns ("", "high"); a trailing colon ("opus:") returns ("opus", "").
1549
// Only the first colon is treated as the separator; anything after is passed through as effort.
1550
func ParseModelEffort(s string) (model, effort string) {
31✔
1551
        model, effort, _ = strings.Cut(s, ":")
31✔
1552
        return model, effort
31✔
1553
}
31✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc