21302158116

Committed 23 Jan 2026 09:42PM UTC coverage: 79.365% (-0.03%) from 79.395%

Build # 21302158116

Build Type

Pull #17

github

Committed by

melonamin

Commit Message

refactor: simplify values.go WatchDirs to minimal inline parsing

Reverted helper functions and constants that were out of scope for
WatchDirs feature. Now uses same GetKey inline pattern as existing
config parsing code.

Pull Request Pull Request #17: feat: add web dashboard with real-time streaming and multi-session support

Run Details

1478 of 1874 new or added lines in 19 files covered. (78.87%)

9 existing lines in 2 files now uncovered.

3077 of 3877 relevant lines covered (79.37%)

223.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.21

/pkg/processor/runner.go

// Package processor provides the main orchestration loop for ralphex execution.
package processor

import (
        "context"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/umputun/ralphex/pkg/config"
        "github.com/umputun/ralphex/pkg/executor"
)

// DefaultIterationDelay is the pause between iterations to allow system to settle.
const DefaultIterationDelay = 2 * time.Second

// Mode represents the execution mode.
type Mode string

const (
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
)

// Config holds runner configuration.
type Config struct {
        PlanFile         string         // path to plan file (required for full mode)
        ProgressPath     string         // path to progress file
        Mode             Mode           // execution mode
        MaxIterations    int            // maximum iterations for task phase
        Debug            bool           // enable debug output
        NoColor          bool           // disable color output
        IterationDelayMs int            // delay between iterations in milliseconds
        TaskRetryCount   int            // number of times to retry failed tasks
        CodexEnabled     bool           // whether codex review is enabled
        AppConfig        *config.Config // full application config (for executors and prompts)
}

//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger

// Executor runs CLI commands and returns results.
type Executor interface {
        Run(ctx context.Context, prompt string) executor.Result
}

// Logger provides logging functionality.
type Logger interface {
        SetPhase(phase Phase)
        Print(format string, args ...any)
        PrintRaw(format string, args ...any)
        PrintSection(section Section)
        PrintAligned(text string)
        Path() string
}

// Runner orchestrates the execution loop.
type Runner struct {
        cfg            Config
        log            Logger
        claude         Executor
        codex          Executor
        iterationDelay time.Duration
        taskRetryCount int
}

// New creates a new Runner with the given configuration.
func New(cfg Config, log Logger) *Runner {
        // build claude executor with config values
        claudeExec := &executor.ClaudeExecutor{
                OutputHandler: func(text string) {
                        log.PrintAligned(text)
                },
                Debug: cfg.Debug,
        }
        if cfg.AppConfig != nil {
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
        }

        // build codex executor with config values
        codexExec := &executor.CodexExecutor{
                OutputHandler: func(text string) {
                        log.PrintAligned(text)
                },
                Debug: cfg.Debug,
        }
        if cfg.AppConfig != nil {
                codexExec.Command = cfg.AppConfig.CodexCommand
                codexExec.Model = cfg.AppConfig.CodexModel
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
        }

        return NewWithExecutors(cfg, log, claudeExec, codexExec)
}

// NewWithExecutors creates a new Runner with custom executors (for testing).
func NewWithExecutors(cfg Config, log Logger, claude, codex Executor) *Runner {
        // determine iteration delay from config or default
        iterDelay := DefaultIterationDelay
        if cfg.IterationDelayMs > 0 {
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
        }

        // determine task retry count from config
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
        retryCount := 1
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
                retryCount = cfg.TaskRetryCount
        } else if cfg.TaskRetryCount > 0 {
                retryCount = cfg.TaskRetryCount
        }

        return &Runner{
                cfg:            cfg,
                log:            log,
                claude:         claude,
                codex:          codex,
                iterationDelay: iterDelay,
                taskRetryCount: retryCount,
        }
}

// Run executes the main loop based on configured mode.
func (r *Runner) Run(ctx context.Context) error {
        switch r.cfg.Mode {
        case ModeFull:
                return r.runFull(ctx)
        case ModeReview:
                return r.runReviewOnly(ctx)
        case ModeCodexOnly:
                return r.runCodexOnly(ctx)
        default:
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
        }
}

// runFull executes the complete pipeline: tasks → review → codex → review.
func (r *Runner) runFull(ctx context.Context) error {
        if r.cfg.PlanFile == "" {
                return errors.New("plan file required for full mode")
        }

        // phase 1: task execution
        r.log.SetPhase(PhaseTask)
        r.log.PrintRaw("starting task execution phase\n")

        if err := r.runTaskPhase(ctx); err != nil {
                return fmt.Errorf("task phase: %w", err)
        }

        // phase 2: first review pass - address ALL findings
        r.log.SetPhase(PhaseReview)
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))

        if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {
                return fmt.Errorf("first review: %w", err)
        }

        // phase 2.1: claude review loop (critical/major) before codex
        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("pre-codex review loop: %w", err)
        }

        // phase 2.5: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 3: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("all phases completed successfully")
        return nil
}

// runReviewOnly executes only the review pipeline: review → codex → review.
func (r *Runner) runReviewOnly(ctx context.Context) error {
        // phase 1: first review
        r.log.SetPhase(PhaseReview)
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))

        if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {
                return fmt.Errorf("first review: %w", err)
        }

        // phase 1.1: claude review loop (critical/major) before codex
        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("pre-codex review loop: %w", err)
        }

        // phase 2: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 3: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("review phases completed successfully")
        return nil
}

// runCodexOnly executes only the codex pipeline: codex → review.
func (r *Runner) runCodexOnly(ctx context.Context) error {
        // phase 1: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 2: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("codex phases completed successfully")
        return nil
}

// runTaskPhase executes tasks until completion or max iterations.
// executes ONE Task section per iteration.
func (r *Runner) runTaskPhase(ctx context.Context) error {
        prompt := r.buildTaskPrompt()
        retryCount := 0

        for i := 1; i <= r.cfg.MaxIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("task phase: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewTaskIterationSection(i))

                result := r.claude.Run(ctx, prompt)
                if result.Error != nil {
                        return fmt.Errorf("claude execution: %w", result.Error)
                }

                if result.Signal == SignalCompleted {
                        // verify plan actually has no uncompleted checkboxes
                        if r.hasUncompletedTasks() {
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
                                continue
                        }
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
                        return nil
                }

                if result.Signal == SignalFailed {
                        if retryCount < r.taskRetryCount {
                                r.log.Print("task failed, retrying...")
                                retryCount++
                                time.Sleep(r.iterationDelay)
                                continue
                        }
                        return errors.New("task execution failed after retry (FAILED signal received)")
                }

                retryCount = 0
                // continue with same prompt - it reads from plan file each time
                time.Sleep(r.iterationDelay)
        }

        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
}

// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
        result := r.claude.Run(ctx, prompt)
        if result.Error != nil {
                return fmt.Errorf("claude execution: %w", result.Error)
        }

        if result.Signal == SignalFailed {
                return errors.New("review failed (FAILED signal received)")
        }

        if !IsReviewDone(result.Signal) {
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
        }

        return nil
}

// runClaudeReviewLoop runs claude review iterations using second review prompt.
func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {
        // review iterations = 10% of max_iterations (min 3)
        maxReviewIterations := max(3, r.cfg.MaxIterations/10)

        for i := 1; i <= maxReviewIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("review: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))

                result := r.claude.Run(ctx, r.buildSecondReviewPrompt())
                if result.Error != nil {
                        return fmt.Errorf("claude execution: %w", result.Error)
                }

                if result.Signal == SignalFailed {
                        return errors.New("review failed (FAILED signal received)")
                }

                if IsReviewDone(result.Signal) {
                        r.log.Print("claude review complete - no more findings")
                        return nil
                }

                r.log.Print("issues fixed, running another review iteration...")
                time.Sleep(r.iterationDelay)
        }

        r.log.Print("max claude review iterations reached, continuing...")
        return nil
}

// runCodexLoop runs the codex-claude review loop until no findings.
func (r *Runner) runCodexLoop(ctx context.Context) error {
        // skip codex phase if disabled
        if !r.cfg.CodexEnabled {
                r.log.Print("codex review disabled, skipping...")
                return nil
        }

        // codex iterations = 20% of max_iterations (min 3)
        maxCodexIterations := max(3, r.cfg.MaxIterations/5)

        var claudeResponse string // first iteration has no prior response

        for i := 1; i <= maxCodexIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("codex loop: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewCodexIterationSection(i))

                // run codex analysis
                codexResult := r.codex.Run(ctx, r.buildCodexPrompt(i == 1, claudeResponse))
                if codexResult.Error != nil {
                        return fmt.Errorf("codex execution: %w", codexResult.Error)
                }

                if codexResult.Output == "" {
                        r.log.Print("codex review returned no output, skipping...")
                        break
                }

                // show codex findings summary before Claude evaluation
                r.showCodexSummary(codexResult.Output)

                // pass codex output to claude for evaluation and fixing
                r.log.SetPhase(PhaseClaudeEval)
                r.log.PrintSection(NewClaudeEvalSection())
                claudeResult := r.claude.Run(ctx, r.buildCodexEvaluationPrompt(codexResult.Output))

                // restore codex phase for next iteration
                r.log.SetPhase(PhaseCodex)
                if claudeResult.Error != nil {
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
                }

                claudeResponse = claudeResult.Output

                // exit only when claude sees "no findings" from codex
                if IsCodexDone(claudeResult.Signal) {
                        r.log.Print("codex review complete - no more findings")
                        return nil
                }

                time.Sleep(r.iterationDelay)
        }

        r.log.Print("max codex iterations reached, continuing to next phase...")
        return nil
}

// buildCodexPrompt creates the prompt for codex review.
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
        // build plan context if available
        planContext := ""
        if r.cfg.PlanFile != "" {
                planContext = fmt.Sprintf(`
## Plan Context
The code implements the plan at: %s

---
`, r.cfg.PlanFile)
        }

        // different diff command based on iteration
        var diffInstruction, diffDescription string
        if isFirst {
                diffInstruction = "Run: git diff master...HEAD"
                diffDescription = "code changes between master and HEAD branch"
        } else {
                diffInstruction = "Run: git diff"
                diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"
        }

        basePrompt := fmt.Sprintf(`%sReview the %s.

%s

Analyze for:
- Bugs and logic errors
- Security vulnerabilities
- Race conditions
- Error handling gaps
- Code quality issues

Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)

        if claudeResponse != "" {
                return fmt.Sprintf(`%s

---
PREVIOUS REVIEW CONTEXT:
Claude (previous reviewer) responded to your findings:

%s

Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.
If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)
        }

        return basePrompt
}

// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
// Checks both original path and completed/ subdirectory.
func (r *Runner) hasUncompletedTasks() bool {
        // try original path first
        content, err := os.ReadFile(r.cfg.PlanFile)
        if err != nil {
                // try completed/ subdirectory as fallback
                completedPath := filepath.Join(filepath.Dir(r.cfg.PlanFile), "completed", filepath.Base(r.cfg.PlanFile))
                content, err = os.ReadFile(completedPath) //nolint:gosec // planFile from CLI args
                if err != nil {
                        return true // assume incomplete if can't read from either location
                }
        }

        // look for uncompleted checkbox pattern: [ ] (not [x])
        for line := range strings.SplitSeq(string(content), "\n") {
                trimmed := strings.TrimSpace(line)
                if strings.HasPrefix(trimmed, "- [ ]") {
                        return true
                }
        }
        return false
}

// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
// extracts text until first code block or 500 chars, whichever is shorter.
func (r *Runner) showCodexSummary(output string) {
        summary := output

        // trim to first code block if present
        if idx := strings.Index(summary, "```"); idx > 0 {
                summary = summary[:idx]
        }

        // limit to 5000 chars
        if len(summary) > 5000 {
                summary = summary[:5000] + "..."
        }

        summary = strings.TrimSpace(summary)
        if summary == "" {
                return
        }

        r.log.Print("codex findings:")
        for line := range strings.SplitSeq(summary, "\n") {
                if strings.TrimSpace(line) == "" {
                        continue
                }
                r.log.PrintAligned("  " + line)
        }
}

1	// Package processor provides the main orchestration loop for ralphex execution.
2	package processor
3
4	import (
5	"context"
6	"errors"
7	"fmt"
8	"os"
9	"path/filepath"
10	"strings"
11	"time"
12
13	"github.com/umputun/ralphex/pkg/config"
14	"github.com/umputun/ralphex/pkg/executor"
15	)
16
17	// DefaultIterationDelay is the pause between iterations to allow system to settle.
18	const DefaultIterationDelay = 2 * time.Second
19
20	// Mode represents the execution mode.
21	type Mode string
22
23	const (
24	ModeFull Mode = "full" // full execution: tasks + reviews + codex
25	ModeReview Mode = "review" // skip tasks, run full review pipeline
26	ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
27	)
28
29	// Config holds runner configuration.
30	type Config struct {
31	PlanFile string // path to plan file (required for full mode)
32	ProgressPath string // path to progress file
33	Mode Mode // execution mode
34	MaxIterations int // maximum iterations for task phase
35	Debug bool // enable debug output
36	NoColor bool // disable color output
37	IterationDelayMs int // delay between iterations in milliseconds
38	TaskRetryCount int // number of times to retry failed tasks
39	CodexEnabled bool // whether codex review is enabled
40	AppConfig *config.Config // full application config (for executors and prompts)
41	}
42
43	//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
44	//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
45
46	// Executor runs CLI commands and returns results.
47	type Executor interface {
48	Run(ctx context.Context, prompt string) executor.Result
49	}
50
51	// Logger provides logging functionality.
52	type Logger interface {
53	SetPhase(phase Phase)
54	Print(format string, args ...any)
55	PrintRaw(format string, args ...any)
56	PrintSection(section Section)
57	PrintAligned(text string)
58	Path() string
59	}
60
61	// Runner orchestrates the execution loop.
62	type Runner struct {
63	cfg Config
64	log Logger
65	claude Executor
66	codex Executor
67	iterationDelay time.Duration
68	taskRetryCount int
69	}
70
71	// New creates a new Runner with the given configuration.
72	func New(cfg Config, log Logger) *Runner {	×
73	// build claude executor with config values	×
74	claudeExec := &executor.ClaudeExecutor{	×
75	OutputHandler: func(text string) {	×
76	log.PrintAligned(text)	×
77	},	×
78	Debug: cfg.Debug,
79	}
80	if cfg.AppConfig != nil {	×
81	claudeExec.Command = cfg.AppConfig.ClaudeCommand	×
82	claudeExec.Args = cfg.AppConfig.ClaudeArgs	×
83	}	×
84
85	// build codex executor with config values
86	codexExec := &executor.CodexExecutor{	×
87	OutputHandler: func(text string) {	×
88	log.PrintAligned(text)	×
89	},	×
90	Debug: cfg.Debug,
91	}
92	if cfg.AppConfig != nil {	×
93	codexExec.Command = cfg.AppConfig.CodexCommand	×
94	codexExec.Model = cfg.AppConfig.CodexModel	×
95	codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort	×
96	codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs	×
97	codexExec.Sandbox = cfg.AppConfig.CodexSandbox	×
98	}	×
99
100	return NewWithExecutors(cfg, log, claudeExec, codexExec)	×
101	}
102
103	// NewWithExecutors creates a new Runner with custom executors (for testing).
104	func NewWithExecutors(cfg Config, log Logger, claude, codex Executor) *Runner {	22✔
105	// determine iteration delay from config or default	22✔
106	iterDelay := DefaultIterationDelay	22✔
107	if cfg.IterationDelayMs > 0 {	24✔
108	iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond	2✔
109	}	2✔
110
111	// determine task retry count from config
112	// appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
113	retryCount := 1	22✔
114	if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {	35✔
115	retryCount = cfg.TaskRetryCount	13✔
116	} else if cfg.TaskRetryCount > 0 {	23✔
117	retryCount = cfg.TaskRetryCount	1✔
118	}	1✔
119
120	return &Runner{	22✔
121	cfg: cfg,	22✔
122	log: log,	22✔
123	claude: claude,	22✔
124	codex: codex,	22✔
125	iterationDelay: iterDelay,	22✔
126	taskRetryCount: retryCount,	22✔
127	}	22✔
128	}
129
130	// Run executes the main loop based on configured mode.
131	func (r *Runner) Run(ctx context.Context) error {	15✔
132	switch r.cfg.Mode {	15✔
133	case ModeFull:	8✔
134	return r.runFull(ctx)	8✔
135	case ModeReview:	3✔
136	return r.runReviewOnly(ctx)	3✔
137	case ModeCodexOnly:	3✔
138	return r.runCodexOnly(ctx)	3✔
139	default:	1✔
140	return fmt.Errorf("unknown mode: %s", r.cfg.Mode)	1✔
141	}
142	}
143
144	// runFull executes the complete pipeline: tasks → review → codex → review.
145	func (r *Runner) runFull(ctx context.Context) error {	8✔
146	if r.cfg.PlanFile == "" {	9✔
147	return errors.New("plan file required for full mode")	1✔
148	}	1✔
149
150	// phase 1: task execution
151	r.log.SetPhase(PhaseTask)	7✔
152	r.log.PrintRaw("starting task execution phase\n")	7✔
153		7✔
154	if err := r.runTaskPhase(ctx); err != nil {	12✔
155	return fmt.Errorf("task phase: %w", err)	5✔
156	}	5✔
157
158	// phase 2: first review pass - address ALL findings
159	r.log.SetPhase(PhaseReview)	2✔
160	r.log.PrintSection(NewGenericSection("claude review 0: all findings"))	2✔
161		2✔
162	if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {	2✔
163	return fmt.Errorf("first review: %w", err)	×
164	}	×
165
166	// phase 2.1: claude review loop (critical/major) before codex
167	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
168	return fmt.Errorf("pre-codex review loop: %w", err)	×
169	}	×
170
171	// phase 2.5: codex external review loop
172	r.log.SetPhase(PhaseCodex)	2✔
173	r.log.PrintSection(NewGenericSection("codex external review"))	2✔
174		2✔
175	if err := r.runCodexLoop(ctx); err != nil {	2✔
176	return fmt.Errorf("codex loop: %w", err)	×
177	}	×
178
179	// phase 3: claude review loop (critical/major) after codex
180	r.log.SetPhase(PhaseReview)	2✔
181		2✔
182	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
183	return fmt.Errorf("post-codex review loop: %w", err)	×
184	}	×
185
186	r.log.Print("all phases completed successfully")	2✔
187	return nil	2✔
188	}
189
190	// runReviewOnly executes only the review pipeline: review → codex → review.
191	func (r *Runner) runReviewOnly(ctx context.Context) error {	3✔
192	// phase 1: first review	3✔
193	r.log.SetPhase(PhaseReview)	3✔
194	r.log.PrintSection(NewGenericSection("claude review 0: all findings"))	3✔
195		3✔
196	if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {	4✔
197	return fmt.Errorf("first review: %w", err)	1✔
198	}	1✔
199
200	// phase 1.1: claude review loop (critical/major) before codex
201	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
202	return fmt.Errorf("pre-codex review loop: %w", err)	×
203	}	×
204
205	// phase 2: codex external review loop
206	r.log.SetPhase(PhaseCodex)	2✔
207	r.log.PrintSection(NewGenericSection("codex external review"))	2✔
208		2✔
209	if err := r.runCodexLoop(ctx); err != nil {	3✔
210	return fmt.Errorf("codex loop: %w", err)	1✔
211	}	1✔
212
213	// phase 3: claude review loop (critical/major) after codex
214	r.log.SetPhase(PhaseReview)	1✔
215		1✔
216	if err := r.runClaudeReviewLoop(ctx); err != nil {	1✔
217	return fmt.Errorf("post-codex review loop: %w", err)	×
218	}	×
219
220	r.log.Print("review phases completed successfully")	1✔
221	return nil	1✔
222	}
223
224	// runCodexOnly executes only the codex pipeline: codex → review.
225	func (r *Runner) runCodexOnly(ctx context.Context) error {	3✔
226	// phase 1: codex external review loop	3✔
227	r.log.SetPhase(PhaseCodex)	3✔
228	r.log.PrintSection(NewGenericSection("codex external review"))	3✔
229		3✔
230	if err := r.runCodexLoop(ctx); err != nil {	3✔
231	return fmt.Errorf("codex loop: %w", err)	×
232	}	×
233
234	// phase 2: claude review loop (critical/major) after codex
235	r.log.SetPhase(PhaseReview)	3✔
236		3✔
237	if err := r.runClaudeReviewLoop(ctx); err != nil {	3✔
238	return fmt.Errorf("post-codex review loop: %w", err)	×
239	}	×
240
241	r.log.Print("codex phases completed successfully")	3✔
242	return nil	3✔
243	}
244
245	// runTaskPhase executes tasks until completion or max iterations.
246	// executes ONE Task section per iteration.
247	func (r *Runner) runTaskPhase(ctx context.Context) error {	7✔
248	prompt := r.buildTaskPrompt()	7✔
249	retryCount := 0	7✔
250		7✔
251	for i := 1; i <= r.cfg.MaxIterations; i++ {	18✔
252	select {	11✔
253	case <-ctx.Done():	1✔
254	return fmt.Errorf("task phase: %w", ctx.Err())	1✔
255	default:	10✔
256	}
257
258	r.log.PrintSection(NewTaskIterationSection(i))	10✔
259		10✔
260	result := r.claude.Run(ctx, prompt)	10✔
261	if result.Error != nil {	11✔
262	return fmt.Errorf("claude execution: %w", result.Error)	1✔
263	}	1✔
264
265	if result.Signal == SignalCompleted {	11✔
266	// verify plan actually has no uncompleted checkboxes	2✔
267	if r.hasUncompletedTasks() {	2✔
268	r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")	×
269	continue	×
270	}
271	r.log.PrintRaw("\nall tasks completed, starting code review...\n")	2✔
272	return nil	2✔
273	}
274
275	if result.Signal == SignalFailed {	11✔
276	if retryCount < r.taskRetryCount {	6✔
277	r.log.Print("task failed, retrying...")	2✔
278	retryCount++	2✔
279	time.Sleep(r.iterationDelay)	2✔
280	continue	2✔
281	}
282	return errors.New("task execution failed after retry (FAILED signal received)")	2✔
283	}
284
285	retryCount = 0	3✔
286	// continue with same prompt - it reads from plan file each time	3✔
287	time.Sleep(r.iterationDelay)	3✔
288	}
289
290	return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)	1✔
291	}
292
293	// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
294	func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {	5✔
295	result := r.claude.Run(ctx, prompt)	5✔
296	if result.Error != nil {	5✔
297	return fmt.Errorf("claude execution: %w", result.Error)	×
298	}	×
299
300	if result.Signal == SignalFailed {	6✔
301	return errors.New("review failed (FAILED signal received)")	1✔
302	}	1✔
303
304	if !IsReviewDone(result.Signal) {	4✔
305	r.log.Print("warning: first review pass did not complete cleanly, continuing...")	×
306	}	×
307
308	return nil	4✔
309	}
310
311	// runClaudeReviewLoop runs claude review iterations using second review prompt.
312	func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {	10✔
313	// review iterations = 10% of max_iterations (min 3)	10✔
314	maxReviewIterations := max(3, r.cfg.MaxIterations/10)	10✔
315		10✔
316	for i := 1; i <= maxReviewIterations; i++ {	20✔
317	select {	10✔
318	case <-ctx.Done():	×
319	return fmt.Errorf("review: %w", ctx.Err())	×
320	default:	10✔
321	}
322
323	r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))	10✔
324		10✔
325	result := r.claude.Run(ctx, r.buildSecondReviewPrompt())	10✔
326	if result.Error != nil {	10✔
327	return fmt.Errorf("claude execution: %w", result.Error)	×
328	}	×
329
330	if result.Signal == SignalFailed {	10✔
331	return errors.New("review failed (FAILED signal received)")	×
332	}	×
333
334	if IsReviewDone(result.Signal) {	20✔
335	r.log.Print("claude review complete - no more findings")	10✔
336	return nil	10✔
337	}	10✔
338
339	r.log.Print("issues fixed, running another review iteration...")	×
340	time.Sleep(r.iterationDelay)	×
341	}
342
343	r.log.Print("max claude review iterations reached, continuing...")	×
344	return nil	×
345	}
346
347	// runCodexLoop runs the codex-claude review loop until no findings.
348	func (r *Runner) runCodexLoop(ctx context.Context) error {	7✔
349	// skip codex phase if disabled	7✔
350	if !r.cfg.CodexEnabled {	8✔
351	r.log.Print("codex review disabled, skipping...")	1✔
352	return nil	1✔
353	}	1✔
354
355	// codex iterations = 20% of max_iterations (min 3)
356	maxCodexIterations := max(3, r.cfg.MaxIterations/5)	6✔
357		6✔
358	var claudeResponse string // first iteration has no prior response	6✔
359		6✔
360	for i := 1; i <= maxCodexIterations; i++ {	12✔
361	select {	6✔
362	case <-ctx.Done():	×
363	return fmt.Errorf("codex loop: %w", ctx.Err())	×
364	default:	6✔
365	}
366
367	r.log.PrintSection(NewCodexIterationSection(i))	6✔
368		6✔
369	// run codex analysis	6✔
370	codexResult := r.codex.Run(ctx, r.buildCodexPrompt(i == 1, claudeResponse))	6✔
371	if codexResult.Error != nil {	7✔
372	return fmt.Errorf("codex execution: %w", codexResult.Error)	1✔
373	}	1✔
374
375	if codexResult.Output == "" {	7✔
376	r.log.Print("codex review returned no output, skipping...")	2✔
377	break	2✔
378	}
379
380	// show codex findings summary before Claude evaluation
381	r.showCodexSummary(codexResult.Output)	3✔
382		3✔
383	// pass codex output to claude for evaluation and fixing	3✔
384	r.log.SetPhase(PhaseClaudeEval)	3✔
385	r.log.PrintSection(NewClaudeEvalSection())	3✔
386	claudeResult := r.claude.Run(ctx, r.buildCodexEvaluationPrompt(codexResult.Output))	3✔
387		3✔
388	// restore codex phase for next iteration	3✔
389	r.log.SetPhase(PhaseCodex)	3✔
390	if claudeResult.Error != nil {	3✔
391	return fmt.Errorf("claude execution: %w", claudeResult.Error)	×
392	}	×
393
394	claudeResponse = claudeResult.Output	3✔
395		3✔
396	// exit only when claude sees "no findings" from codex	3✔
397	if IsCodexDone(claudeResult.Signal) {	6✔
398	r.log.Print("codex review complete - no more findings")	3✔
399	return nil	3✔
400	}	3✔
401
402	time.Sleep(r.iterationDelay)	×
403	}
404
405	r.log.Print("max codex iterations reached, continuing to next phase...")	2✔
406	return nil	2✔
407	}
408
409	// buildCodexPrompt creates the prompt for codex review.
410	func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {	6✔
411	// build plan context if available	6✔
412	planContext := ""	6✔
413	if r.cfg.PlanFile != "" {	8✔
414	planContext = fmt.Sprintf(`	2✔
415	## Plan Context	2✔
416	The code implements the plan at: %s	2✔
417		2✔
418	---	2✔
419	`, r.cfg.PlanFile)	2✔
420	}	2✔
421
422	// different diff command based on iteration
423	var diffInstruction, diffDescription string	6✔
424	if isFirst {	12✔
425	diffInstruction = "Run: git diff master...HEAD"	6✔
426	diffDescription = "code changes between master and HEAD branch"	6✔
427	} else {	6✔
428	diffInstruction = "Run: git diff"	×
429	diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"	×
430	}	×
431
432	basePrompt := fmt.Sprintf(`%sReview the %s.	6✔
433		6✔
434	%s	6✔
435		6✔
436	Analyze for:	6✔
437	- Bugs and logic errors	6✔
438	- Security vulnerabilities	6✔
439	- Race conditions	6✔
440	- Error handling gaps	6✔
441	- Code quality issues	6✔
442		6✔
443	Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)	6✔
444		6✔
445	if claudeResponse != "" {	6✔
446	return fmt.Sprintf(`%s	×
447		×
448	---	×
449	PREVIOUS REVIEW CONTEXT:	×
450	Claude (previous reviewer) responded to your findings:	×
451		×
452	%s	×
453		×
454	Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.	×
455	If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)	×
456	}	×
457
458	return basePrompt	6✔
459	}
460
461	// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
462	// Checks both original path and completed/ subdirectory.
463	func (r *Runner) hasUncompletedTasks() bool {	6✔
464	// try original path first	6✔
465	content, err := os.ReadFile(r.cfg.PlanFile)	6✔
466	if err != nil {	6✔
UNCOV 467	// try completed/ subdirectory as fallback	×
UNCOV 468	completedPath := filepath.Join(filepath.Dir(r.cfg.PlanFile), "completed", filepath.Base(r.cfg.PlanFile))	×
UNCOV 469	content, err = os.ReadFile(completedPath) //nolint:gosec // planFile from CLI args	×
UNCOV 470	if err != nil {	×
UNCOV 471	return true // assume incomplete if can't read from either location	×
UNCOV 472	}	×
473	}
474
475	// look for uncompleted checkbox pattern: [ ] (not [x])
476	for line := range strings.SplitSeq(string(content), "\n") {	21✔
477	trimmed := strings.TrimSpace(line)	15✔
478	if strings.HasPrefix(trimmed, "- [ ]") {	17✔
479	return true	2✔
480	}	2✔
481	}
482	return false	4✔
483	}
484
485	// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
486	// extracts text until first code block or 500 chars, whichever is shorter.
487	func (r *Runner) showCodexSummary(output string) {	3✔
488	summary := output	3✔
489		3✔
490	// trim to first code block if present	3✔
491	if idx := strings.Index(summary, "```"); idx > 0 {	3✔
492	summary = summary[:idx]	×
493	}	×
494
495	// limit to 5000 chars
496	if len(summary) > 5000 {	3✔
497	summary = summary[:5000] + "..."	×
498	}	×
499
500	summary = strings.TrimSpace(summary)	3✔
501	if summary == "" {	3✔
502	return	×
503	}	×
504
505	r.log.Print("codex findings:")	3✔
506	for line := range strings.SplitSeq(summary, "\n") {	6✔
507	if strings.TrimSpace(line) == "" {	3✔
508	continue	×
509	}
510	r.log.PrintAligned(" " + line)	3✔
511	}
512	}

umputun / ralphex / 21302158116

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous