21305559204

Committed 24 Jan 2026 12:16AM UTC coverage: 79.365% (-0.03%) from 79.395%

Build # 21305559204

Build Type

Pull #17

github

Committed by

web-flow

Commit Message

Merge branch 'master' into web-ui

Pull Request Pull Request #17: feat: add web dashboard with real-time streaming and multi-session support

Run Details

1478 of 1874 new or added lines in 19 files covered. (78.87%)

9 existing lines in 2 files now uncovered.

3077 of 3877 relevant lines covered (79.37%)

224.54 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.21

/pkg/processor/runner.go

// Package processor provides the main orchestration loop for ralphex execution.
package processor

import (
        "context"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/umputun/ralphex/pkg/config"
        "github.com/umputun/ralphex/pkg/executor"
)

// DefaultIterationDelay is the pause between iterations to allow system to settle.
const DefaultIterationDelay = 2 * time.Second

// Mode represents the execution mode.
type Mode string

const (
        ModeFull      Mode = "full"       // full execution: tasks + reviews + codex
        ModeReview    Mode = "review"     // skip tasks, run full review pipeline
        ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
)

// Config holds runner configuration.
type Config struct {
        PlanFile         string         // path to plan file (required for full mode)
        ProgressPath     string         // path to progress file
        Mode             Mode           // execution mode
        MaxIterations    int            // maximum iterations for task phase
        Debug            bool           // enable debug output
        NoColor          bool           // disable color output
        IterationDelayMs int            // delay between iterations in milliseconds
        TaskRetryCount   int            // number of times to retry failed tasks
        CodexEnabled     bool           // whether codex review is enabled
        AppConfig        *config.Config // full application config (for executors and prompts)
}

//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger

// Executor runs CLI commands and returns results.
type Executor interface {
        Run(ctx context.Context, prompt string) executor.Result
}

// Logger provides logging functionality.
type Logger interface {
        SetPhase(phase Phase)
        Print(format string, args ...any)
        PrintRaw(format string, args ...any)
        PrintSection(section Section)
        PrintAligned(text string)
        Path() string
}

// Runner orchestrates the execution loop.
type Runner struct {
        cfg            Config
        log            Logger
        claude         Executor
        codex          Executor
        iterationDelay time.Duration
        taskRetryCount int
}

// New creates a new Runner with the given configuration.
func New(cfg Config, log Logger) *Runner {
        // build claude executor with config values
        claudeExec := &executor.ClaudeExecutor{
                OutputHandler: func(text string) {
                        log.PrintAligned(text)
                },
                Debug: cfg.Debug,
        }
        if cfg.AppConfig != nil {
                claudeExec.Command = cfg.AppConfig.ClaudeCommand
                claudeExec.Args = cfg.AppConfig.ClaudeArgs
        }

        // build codex executor with config values
        codexExec := &executor.CodexExecutor{
                OutputHandler: func(text string) {
                        log.PrintAligned(text)
                },
                Debug: cfg.Debug,
        }
        if cfg.AppConfig != nil {
                codexExec.Command = cfg.AppConfig.CodexCommand
                codexExec.Model = cfg.AppConfig.CodexModel
                codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort
                codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs
                codexExec.Sandbox = cfg.AppConfig.CodexSandbox
        }

        return NewWithExecutors(cfg, log, claudeExec, codexExec)
}

// NewWithExecutors creates a new Runner with custom executors (for testing).
func NewWithExecutors(cfg Config, log Logger, claude, codex Executor) *Runner {
        // determine iteration delay from config or default
        iterDelay := DefaultIterationDelay
        if cfg.IterationDelayMs > 0 {
                iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond
        }

        // determine task retry count from config
        // appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
        retryCount := 1
        if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {
                retryCount = cfg.TaskRetryCount
        } else if cfg.TaskRetryCount > 0 {
                retryCount = cfg.TaskRetryCount
        }

        return &Runner{
                cfg:            cfg,
                log:            log,
                claude:         claude,
                codex:          codex,
                iterationDelay: iterDelay,
                taskRetryCount: retryCount,
        }
}

// Run executes the main loop based on configured mode.
func (r *Runner) Run(ctx context.Context) error {
        switch r.cfg.Mode {
        case ModeFull:
                return r.runFull(ctx)
        case ModeReview:
                return r.runReviewOnly(ctx)
        case ModeCodexOnly:
                return r.runCodexOnly(ctx)
        default:
                return fmt.Errorf("unknown mode: %s", r.cfg.Mode)
        }
}

// runFull executes the complete pipeline: tasks → review → codex → review.
func (r *Runner) runFull(ctx context.Context) error {
        if r.cfg.PlanFile == "" {
                return errors.New("plan file required for full mode")
        }

        // phase 1: task execution
        r.log.SetPhase(PhaseTask)
        r.log.PrintRaw("starting task execution phase\n")

        if err := r.runTaskPhase(ctx); err != nil {
                return fmt.Errorf("task phase: %w", err)
        }

        // phase 2: first review pass - address ALL findings
        r.log.SetPhase(PhaseReview)
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))

        if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {
                return fmt.Errorf("first review: %w", err)
        }

        // phase 2.1: claude review loop (critical/major) before codex
        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("pre-codex review loop: %w", err)
        }

        // phase 2.5: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 3: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("all phases completed successfully")
        return nil
}

// runReviewOnly executes only the review pipeline: review → codex → review.
func (r *Runner) runReviewOnly(ctx context.Context) error {
        // phase 1: first review
        r.log.SetPhase(PhaseReview)
        r.log.PrintSection(NewGenericSection("claude review 0: all findings"))

        if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {
                return fmt.Errorf("first review: %w", err)
        }

        // phase 1.1: claude review loop (critical/major) before codex
        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("pre-codex review loop: %w", err)
        }

        // phase 2: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 3: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("review phases completed successfully")
        return nil
}

// runCodexOnly executes only the codex pipeline: codex → review.
func (r *Runner) runCodexOnly(ctx context.Context) error {
        // phase 1: codex external review loop
        r.log.SetPhase(PhaseCodex)
        r.log.PrintSection(NewGenericSection("codex external review"))

        if err := r.runCodexLoop(ctx); err != nil {
                return fmt.Errorf("codex loop: %w", err)
        }

        // phase 2: claude review loop (critical/major) after codex
        r.log.SetPhase(PhaseReview)

        if err := r.runClaudeReviewLoop(ctx); err != nil {
                return fmt.Errorf("post-codex review loop: %w", err)
        }

        r.log.Print("codex phases completed successfully")
        return nil
}

// runTaskPhase executes tasks until completion or max iterations.
// executes ONE Task section per iteration.
func (r *Runner) runTaskPhase(ctx context.Context) error {
        prompt := r.buildTaskPrompt()
        retryCount := 0

        for i := 1; i <= r.cfg.MaxIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("task phase: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewTaskIterationSection(i))

                result := r.claude.Run(ctx, prompt)
                if result.Error != nil {
                        return fmt.Errorf("claude execution: %w", result.Error)
                }

                if result.Signal == SignalCompleted {
                        // verify plan actually has no uncompleted checkboxes
                        if r.hasUncompletedTasks() {
                                r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")
                                continue
                        }
                        r.log.PrintRaw("\nall tasks completed, starting code review...\n")
                        return nil
                }

                if result.Signal == SignalFailed {
                        if retryCount < r.taskRetryCount {
                                r.log.Print("task failed, retrying...")
                                retryCount++
                                time.Sleep(r.iterationDelay)
                                continue
                        }
                        return errors.New("task execution failed after retry (FAILED signal received)")
                }

                retryCount = 0
                // continue with same prompt - it reads from plan file each time
                time.Sleep(r.iterationDelay)
        }

        return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)
}

// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {
        result := r.claude.Run(ctx, prompt)
        if result.Error != nil {
                return fmt.Errorf("claude execution: %w", result.Error)
        }

        if result.Signal == SignalFailed {
                return errors.New("review failed (FAILED signal received)")
        }

        if !IsReviewDone(result.Signal) {
                r.log.Print("warning: first review pass did not complete cleanly, continuing...")
        }

        return nil
}

// runClaudeReviewLoop runs claude review iterations using second review prompt.
func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {
        // review iterations = 10% of max_iterations (min 3)
        maxReviewIterations := max(3, r.cfg.MaxIterations/10)

        for i := 1; i <= maxReviewIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("review: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))

                result := r.claude.Run(ctx, r.buildSecondReviewPrompt())
                if result.Error != nil {
                        return fmt.Errorf("claude execution: %w", result.Error)
                }

                if result.Signal == SignalFailed {
                        return errors.New("review failed (FAILED signal received)")
                }

                if IsReviewDone(result.Signal) {
                        r.log.Print("claude review complete - no more findings")
                        return nil
                }

                r.log.Print("issues fixed, running another review iteration...")
                time.Sleep(r.iterationDelay)
        }

        r.log.Print("max claude review iterations reached, continuing...")
        return nil
}

// runCodexLoop runs the codex-claude review loop until no findings.
func (r *Runner) runCodexLoop(ctx context.Context) error {
        // skip codex phase if disabled
        if !r.cfg.CodexEnabled {
                r.log.Print("codex review disabled, skipping...")
                return nil
        }

        // codex iterations = 20% of max_iterations (min 3)
        maxCodexIterations := max(3, r.cfg.MaxIterations/5)

        var claudeResponse string // first iteration has no prior response

        for i := 1; i <= maxCodexIterations; i++ {
                select {
                case <-ctx.Done():
                        return fmt.Errorf("codex loop: %w", ctx.Err())
                default:
                }

                r.log.PrintSection(NewCodexIterationSection(i))

                // run codex analysis
                codexResult := r.codex.Run(ctx, r.buildCodexPrompt(i == 1, claudeResponse))
                if codexResult.Error != nil {
                        return fmt.Errorf("codex execution: %w", codexResult.Error)
                }

                if codexResult.Output == "" {
                        r.log.Print("codex review returned no output, skipping...")
                        break
                }

                // show codex findings summary before Claude evaluation
                r.showCodexSummary(codexResult.Output)

                // pass codex output to claude for evaluation and fixing
                r.log.SetPhase(PhaseClaudeEval)
                r.log.PrintSection(NewClaudeEvalSection())
                claudeResult := r.claude.Run(ctx, r.buildCodexEvaluationPrompt(codexResult.Output))

                // restore codex phase for next iteration
                r.log.SetPhase(PhaseCodex)
                if claudeResult.Error != nil {
                        return fmt.Errorf("claude execution: %w", claudeResult.Error)
                }

                claudeResponse = claudeResult.Output

                // exit only when claude sees "no findings" from codex
                if IsCodexDone(claudeResult.Signal) {
                        r.log.Print("codex review complete - no more findings")
                        return nil
                }

                time.Sleep(r.iterationDelay)
        }

        r.log.Print("max codex iterations reached, continuing to next phase...")
        return nil
}

// buildCodexPrompt creates the prompt for codex review.
func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {
        // build plan context if available
        planContext := ""
        if r.cfg.PlanFile != "" {
                planContext = fmt.Sprintf(`
## Plan Context
The code implements the plan at: %s

---
`, r.cfg.PlanFile)
        }

        // different diff command based on iteration
        var diffInstruction, diffDescription string
        if isFirst {
                diffInstruction = "Run: git diff master...HEAD"
                diffDescription = "code changes between master and HEAD branch"
        } else {
                diffInstruction = "Run: git diff"
                diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"
        }

        basePrompt := fmt.Sprintf(`%sReview the %s.

%s

Analyze for:
- Bugs and logic errors
- Security vulnerabilities
- Race conditions
- Error handling gaps
- Code quality issues

Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)

        if claudeResponse != "" {
                return fmt.Sprintf(`%s

---
PREVIOUS REVIEW CONTEXT:
Claude (previous reviewer) responded to your findings:

%s

Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.
If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)
        }

        return basePrompt
}

// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
// Checks both original path and completed/ subdirectory.
func (r *Runner) hasUncompletedTasks() bool {
        // try original path first
        content, err := os.ReadFile(r.cfg.PlanFile)
        if err != nil {
                // try completed/ subdirectory as fallback
                completedPath := filepath.Join(filepath.Dir(r.cfg.PlanFile), "completed", filepath.Base(r.cfg.PlanFile))
                content, err = os.ReadFile(completedPath) //nolint:gosec // planFile from CLI args
                if err != nil {
                        return true // assume incomplete if can't read from either location
                }
        }

        // look for uncompleted checkbox pattern: [ ] (not [x])
        for line := range strings.SplitSeq(string(content), "\n") {
                trimmed := strings.TrimSpace(line)
                if strings.HasPrefix(trimmed, "- [ ]") {
                        return true
                }
        }
        return false
}

// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
// extracts text until first code block or 500 chars, whichever is shorter.
func (r *Runner) showCodexSummary(output string) {
        summary := output

        // trim to first code block if present
        if idx := strings.Index(summary, "```"); idx > 0 {
                summary = summary[:idx]
        }

        // limit to 5000 chars
        if len(summary) > 5000 {
                summary = summary[:5000] + "..."
        }

        summary = strings.TrimSpace(summary)
        if summary == "" {
                return
        }

        r.log.Print("codex findings:")
        for line := range strings.SplitSeq(summary, "\n") {
                if strings.TrimSpace(line) == "" {
                        continue
                }
                r.log.PrintAligned("  " + line)
        }
}

1	// Package processor provides the main orchestration loop for ralphex execution.
2	package processor
3
4	import (
5	"context"
6	"errors"
7	"fmt"
8	"os"
9	"path/filepath"
10	"strings"
11	"time"
12
13	"github.com/umputun/ralphex/pkg/config"
14	"github.com/umputun/ralphex/pkg/executor"
15	)
16
17	// DefaultIterationDelay is the pause between iterations to allow system to settle.
18	const DefaultIterationDelay = 2 * time.Second
19
20	// Mode represents the execution mode.
21	type Mode string
22
23	const (
24	ModeFull Mode = "full" // full execution: tasks + reviews + codex
25	ModeReview Mode = "review" // skip tasks, run full review pipeline
26	ModeCodexOnly Mode = "codex-only" // skip tasks and first review, run only codex loop
27	)
28
29	// Config holds runner configuration.
30	type Config struct {
31	PlanFile string // path to plan file (required for full mode)
32	ProgressPath string // path to progress file
33	Mode Mode // execution mode
34	MaxIterations int // maximum iterations for task phase
35	Debug bool // enable debug output
36	NoColor bool // disable color output
37	IterationDelayMs int // delay between iterations in milliseconds
38	TaskRetryCount int // number of times to retry failed tasks
39	CodexEnabled bool // whether codex review is enabled
40	AppConfig *config.Config // full application config (for executors and prompts)
41	}
42
43	//go:generate moq -out mocks/executor.go -pkg mocks -skip-ensure -fmt goimports . Executor
44	//go:generate moq -out mocks/logger.go -pkg mocks -skip-ensure -fmt goimports . Logger
45
46	// Executor runs CLI commands and returns results.
47	type Executor interface {
48	Run(ctx context.Context, prompt string) executor.Result
49	}
50
51	// Logger provides logging functionality.
52	type Logger interface {
53	SetPhase(phase Phase)
54	Print(format string, args ...any)
55	PrintRaw(format string, args ...any)
56	PrintSection(section Section)
57	PrintAligned(text string)
58	Path() string
59	}
60
61	// Runner orchestrates the execution loop.
62	type Runner struct {
63	cfg Config
64	log Logger
65	claude Executor
66	codex Executor
67	iterationDelay time.Duration
68	taskRetryCount int
69	}
70
71	// New creates a new Runner with the given configuration.
72	func New(cfg Config, log Logger) *Runner {	×
73	// build claude executor with config values	×
74	claudeExec := &executor.ClaudeExecutor{	×
75	OutputHandler: func(text string) {	×
76	log.PrintAligned(text)	×
77	},	×
78	Debug: cfg.Debug,
79	}
80	if cfg.AppConfig != nil {	×
81	claudeExec.Command = cfg.AppConfig.ClaudeCommand	×
82	claudeExec.Args = cfg.AppConfig.ClaudeArgs	×
83	}	×
84
85	// build codex executor with config values
86	codexExec := &executor.CodexExecutor{	×
87	OutputHandler: func(text string) {	×
88	log.PrintAligned(text)	×
89	},	×
90	Debug: cfg.Debug,
91	}
92	if cfg.AppConfig != nil {	×
93	codexExec.Command = cfg.AppConfig.CodexCommand	×
94	codexExec.Model = cfg.AppConfig.CodexModel	×
95	codexExec.ReasoningEffort = cfg.AppConfig.CodexReasoningEffort	×
96	codexExec.TimeoutMs = cfg.AppConfig.CodexTimeoutMs	×
97	codexExec.Sandbox = cfg.AppConfig.CodexSandbox	×
98	}	×
99
100	return NewWithExecutors(cfg, log, claudeExec, codexExec)	×
101	}
102
103	// NewWithExecutors creates a new Runner with custom executors (for testing).
104	func NewWithExecutors(cfg Config, log Logger, claude, codex Executor) *Runner {	22✔
105	// determine iteration delay from config or default	22✔
106	iterDelay := DefaultIterationDelay	22✔
107	if cfg.IterationDelayMs > 0 {	24✔
108	iterDelay = time.Duration(cfg.IterationDelayMs) * time.Millisecond	2✔
109	}	2✔
110
111	// determine task retry count from config
112	// appConfig.TaskRetryCountSet means user explicitly set it (even to 0 for no retries)
113	retryCount := 1	22✔
114	if cfg.AppConfig != nil && cfg.AppConfig.TaskRetryCountSet {	35✔
115	retryCount = cfg.TaskRetryCount	13✔
116	} else if cfg.TaskRetryCount > 0 {	23✔
117	retryCount = cfg.TaskRetryCount	1✔
118	}	1✔
119
120	return &Runner{	22✔
121	cfg: cfg,	22✔
122	log: log,	22✔
123	claude: claude,	22✔
124	codex: codex,	22✔
125	iterationDelay: iterDelay,	22✔
126	taskRetryCount: retryCount,	22✔
127	}	22✔
128	}
129
130	// Run executes the main loop based on configured mode.
131	func (r *Runner) Run(ctx context.Context) error {	15✔
132	switch r.cfg.Mode {	15✔
133	case ModeFull:	8✔
134	return r.runFull(ctx)	8✔
135	case ModeReview:	3✔
136	return r.runReviewOnly(ctx)	3✔
137	case ModeCodexOnly:	3✔
138	return r.runCodexOnly(ctx)	3✔
139	default:	1✔
140	return fmt.Errorf("unknown mode: %s", r.cfg.Mode)	1✔
141	}
142	}
143
144	// runFull executes the complete pipeline: tasks → review → codex → review.
145	func (r *Runner) runFull(ctx context.Context) error {	8✔
146	if r.cfg.PlanFile == "" {	9✔
147	return errors.New("plan file required for full mode")	1✔
148	}	1✔
149
150	// phase 1: task execution
151	r.log.SetPhase(PhaseTask)	7✔
152	r.log.PrintRaw("starting task execution phase\n")	7✔
153		7✔
154	if err := r.runTaskPhase(ctx); err != nil {	12✔
155	return fmt.Errorf("task phase: %w", err)	5✔
156	}	5✔
157
158	// phase 2: first review pass - address ALL findings
159	r.log.SetPhase(PhaseReview)	2✔
160	r.log.PrintSection(NewGenericSection("claude review 0: all findings"))	2✔
161		2✔
162	if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {	2✔
163	return fmt.Errorf("first review: %w", err)	×
164	}	×
165
166	// phase 2.1: claude review loop (critical/major) before codex
167	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
168	return fmt.Errorf("pre-codex review loop: %w", err)	×
169	}	×
170
171	// phase 2.5: codex external review loop
172	r.log.SetPhase(PhaseCodex)	2✔
173	r.log.PrintSection(NewGenericSection("codex external review"))	2✔
174		2✔
175	if err := r.runCodexLoop(ctx); err != nil {	2✔
176	return fmt.Errorf("codex loop: %w", err)	×
177	}	×
178
179	// phase 3: claude review loop (critical/major) after codex
180	r.log.SetPhase(PhaseReview)	2✔
181		2✔
182	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
183	return fmt.Errorf("post-codex review loop: %w", err)	×
184	}	×
185
186	r.log.Print("all phases completed successfully")	2✔
187	return nil	2✔
188	}
189
190	// runReviewOnly executes only the review pipeline: review → codex → review.
191	func (r *Runner) runReviewOnly(ctx context.Context) error {	3✔
192	// phase 1: first review	3✔
193	r.log.SetPhase(PhaseReview)	3✔
194	r.log.PrintSection(NewGenericSection("claude review 0: all findings"))	3✔
195		3✔
196	if err := r.runClaudeReview(ctx, r.buildFirstReviewPrompt()); err != nil {	4✔
197	return fmt.Errorf("first review: %w", err)	1✔
198	}	1✔
199
200	// phase 1.1: claude review loop (critical/major) before codex
201	if err := r.runClaudeReviewLoop(ctx); err != nil {	2✔
202	return fmt.Errorf("pre-codex review loop: %w", err)	×
203	}	×
204
205	// phase 2: codex external review loop
206	r.log.SetPhase(PhaseCodex)	2✔
207	r.log.PrintSection(NewGenericSection("codex external review"))	2✔
208		2✔
209	if err := r.runCodexLoop(ctx); err != nil {	3✔
210	return fmt.Errorf("codex loop: %w", err)	1✔
211	}	1✔
212
213	// phase 3: claude review loop (critical/major) after codex
214	r.log.SetPhase(PhaseReview)	1✔
215		1✔
216	if err := r.runClaudeReviewLoop(ctx); err != nil {	1✔
217	return fmt.Errorf("post-codex review loop: %w", err)	×
218	}	×
219
220	r.log.Print("review phases completed successfully")	1✔
221	return nil	1✔
222	}
223
224	// runCodexOnly executes only the codex pipeline: codex → review.
225	func (r *Runner) runCodexOnly(ctx context.Context) error {	3✔
226	// phase 1: codex external review loop	3✔
227	r.log.SetPhase(PhaseCodex)	3✔
228	r.log.PrintSection(NewGenericSection("codex external review"))	3✔
229		3✔
230	if err := r.runCodexLoop(ctx); err != nil {	3✔
231	return fmt.Errorf("codex loop: %w", err)	×
232	}	×
233
234	// phase 2: claude review loop (critical/major) after codex
235	r.log.SetPhase(PhaseReview)	3✔
236		3✔
237	if err := r.runClaudeReviewLoop(ctx); err != nil {	3✔
238	return fmt.Errorf("post-codex review loop: %w", err)	×
239	}	×
240
241	r.log.Print("codex phases completed successfully")	3✔
242	return nil	3✔
243	}
244
245	// runTaskPhase executes tasks until completion or max iterations.
246	// executes ONE Task section per iteration.
247	func (r *Runner) runTaskPhase(ctx context.Context) error {	7✔
248	prompt := r.buildTaskPrompt()	7✔
249	retryCount := 0	7✔
250		7✔
251	for i := 1; i <= r.cfg.MaxIterations; i++ {	18✔
252	select {	11✔
253	case <-ctx.Done():	1✔
254	return fmt.Errorf("task phase: %w", ctx.Err())	1✔
255	default:	10✔
256	}
257
258	r.log.PrintSection(NewTaskIterationSection(i))	10✔
259		10✔
260	result := r.claude.Run(ctx, prompt)	10✔
261	if result.Error != nil {	11✔
262	return fmt.Errorf("claude execution: %w", result.Error)	1✔
263	}	1✔
264
265	if result.Signal == SignalCompleted {	11✔
266	// verify plan actually has no uncompleted checkboxes	2✔
267	if r.hasUncompletedTasks() {	2✔
268	r.log.Print("warning: completion signal received but plan still has [ ] items, continuing...")	×
269	continue	×
270	}
271	r.log.PrintRaw("\nall tasks completed, starting code review...\n")	2✔
272	return nil	2✔
273	}
274
275	if result.Signal == SignalFailed {	11✔
276	if retryCount < r.taskRetryCount {	6✔
277	r.log.Print("task failed, retrying...")	2✔
278	retryCount++	2✔
279	time.Sleep(r.iterationDelay)	2✔
280	continue	2✔
281	}
282	return errors.New("task execution failed after retry (FAILED signal received)")	2✔
283	}
284
285	retryCount = 0	3✔
286	// continue with same prompt - it reads from plan file each time	3✔
287	time.Sleep(r.iterationDelay)	3✔
288	}
289
290	return fmt.Errorf("max iterations (%d) reached without completion", r.cfg.MaxIterations)	1✔
291	}
292
293	// runClaudeReview runs Claude review with the given prompt until REVIEW_DONE.
294	func (r *Runner) runClaudeReview(ctx context.Context, prompt string) error {	5✔
295	result := r.claude.Run(ctx, prompt)	5✔
296	if result.Error != nil {	5✔
297	return fmt.Errorf("claude execution: %w", result.Error)	×
298	}	×
299
300	if result.Signal == SignalFailed {	6✔
301	return errors.New("review failed (FAILED signal received)")	1✔
302	}	1✔
303
304	if !IsReviewDone(result.Signal) {	4✔
305	r.log.Print("warning: first review pass did not complete cleanly, continuing...")	×
306	}	×
307
308	return nil	4✔
309	}
310
311	// runClaudeReviewLoop runs claude review iterations using second review prompt.
312	func (r *Runner) runClaudeReviewLoop(ctx context.Context) error {	10✔
313	// review iterations = 10% of max_iterations (min 3)	10✔
314	maxReviewIterations := max(3, r.cfg.MaxIterations/10)	10✔
315		10✔
316	for i := 1; i <= maxReviewIterations; i++ {	20✔
317	select {	10✔
318	case <-ctx.Done():	×
319	return fmt.Errorf("review: %w", ctx.Err())	×
320	default:	10✔
321	}
322
323	r.log.PrintSection(NewClaudeReviewSection(i, ": critical/major"))	10✔
324		10✔
325	result := r.claude.Run(ctx, r.buildSecondReviewPrompt())	10✔
326	if result.Error != nil {	10✔
327	return fmt.Errorf("claude execution: %w", result.Error)	×
328	}	×
329
330	if result.Signal == SignalFailed {	10✔
331	return errors.New("review failed (FAILED signal received)")	×
332	}	×
333
334	if IsReviewDone(result.Signal) {	20✔
335	r.log.Print("claude review complete - no more findings")	10✔
336	return nil	10✔
337	}	10✔
338
339	r.log.Print("issues fixed, running another review iteration...")	×
340	time.Sleep(r.iterationDelay)	×
341	}
342
343	r.log.Print("max claude review iterations reached, continuing...")	×
344	return nil	×
345	}
346
347	// runCodexLoop runs the codex-claude review loop until no findings.
348	func (r *Runner) runCodexLoop(ctx context.Context) error {	7✔
349	// skip codex phase if disabled	7✔
350	if !r.cfg.CodexEnabled {	8✔
351	r.log.Print("codex review disabled, skipping...")	1✔
352	return nil	1✔
353	}	1✔
354
355	// codex iterations = 20% of max_iterations (min 3)
356	maxCodexIterations := max(3, r.cfg.MaxIterations/5)	6✔
357		6✔
358	var claudeResponse string // first iteration has no prior response	6✔
359		6✔
360	for i := 1; i <= maxCodexIterations; i++ {	12✔
361	select {	6✔
362	case <-ctx.Done():	×
363	return fmt.Errorf("codex loop: %w", ctx.Err())	×
364	default:	6✔
365	}
366
367	r.log.PrintSection(NewCodexIterationSection(i))	6✔
368		6✔
369	// run codex analysis	6✔
370	codexResult := r.codex.Run(ctx, r.buildCodexPrompt(i == 1, claudeResponse))	6✔
371	if codexResult.Error != nil {	7✔
372	return fmt.Errorf("codex execution: %w", codexResult.Error)	1✔
373	}	1✔
374
375	if codexResult.Output == "" {	7✔
376	r.log.Print("codex review returned no output, skipping...")	2✔
377	break	2✔
378	}
379
380	// show codex findings summary before Claude evaluation
381	r.showCodexSummary(codexResult.Output)	3✔
382		3✔
383	// pass codex output to claude for evaluation and fixing	3✔
384	r.log.SetPhase(PhaseClaudeEval)	3✔
385	r.log.PrintSection(NewClaudeEvalSection())	3✔
386	claudeResult := r.claude.Run(ctx, r.buildCodexEvaluationPrompt(codexResult.Output))	3✔
387		3✔
388	// restore codex phase for next iteration	3✔
389	r.log.SetPhase(PhaseCodex)	3✔
390	if claudeResult.Error != nil {	3✔
391	return fmt.Errorf("claude execution: %w", claudeResult.Error)	×
392	}	×
393
394	claudeResponse = claudeResult.Output	3✔
395		3✔
396	// exit only when claude sees "no findings" from codex	3✔
397	if IsCodexDone(claudeResult.Signal) {	6✔
398	r.log.Print("codex review complete - no more findings")	3✔
399	return nil	3✔
400	}	3✔
401
402	time.Sleep(r.iterationDelay)	×
403	}
404
405	r.log.Print("max codex iterations reached, continuing to next phase...")	2✔
406	return nil	2✔
407	}
408
409	// buildCodexPrompt creates the prompt for codex review.
410	func (r *Runner) buildCodexPrompt(isFirst bool, claudeResponse string) string {	6✔
411	// build plan context if available	6✔
412	planContext := ""	6✔
413	if r.cfg.PlanFile != "" {	8✔
414	planContext = fmt.Sprintf(`	2✔
415	## Plan Context	2✔
416	The code implements the plan at: %s	2✔
417		2✔
418	---	2✔
419	`, r.cfg.PlanFile)	2✔
420	}	2✔
421
422	// different diff command based on iteration
423	var diffInstruction, diffDescription string	6✔
424	if isFirst {	12✔
425	diffInstruction = "Run: git diff master...HEAD"	6✔
426	diffDescription = "code changes between master and HEAD branch"	6✔
427	} else {	6✔
428	diffInstruction = "Run: git diff"	×
429	diffDescription = "uncommitted changes (Claude's fixes from previous iteration)"	×
430	}	×
431
432	basePrompt := fmt.Sprintf(`%sReview the %s.	6✔
433		6✔
434	%s	6✔
435		6✔
436	Analyze for:	6✔
437	- Bugs and logic errors	6✔
438	- Security vulnerabilities	6✔
439	- Race conditions	6✔
440	- Error handling gaps	6✔
441	- Code quality issues	6✔
442		6✔
443	Report findings with file:line references. If no issues found, say "NO ISSUES FOUND".`, planContext, diffDescription, diffInstruction)	6✔
444		6✔
445	if claudeResponse != "" {	6✔
446	return fmt.Sprintf(`%s	×
447		×
448	---	×
449	PREVIOUS REVIEW CONTEXT:	×
450	Claude (previous reviewer) responded to your findings:	×
451		×
452	%s	×
453		×
454	Re-evaluate considering Claude's arguments. If Claude's fixes are correct, acknowledge them.	×
455	If Claude's arguments are invalid, explain why the issues still exist.`, basePrompt, claudeResponse)	×
456	}	×
457
458	return basePrompt	6✔
459	}
460
461	// hasUncompletedTasks checks if plan file has any uncompleted checkboxes.
462	// Checks both original path and completed/ subdirectory.
463	func (r *Runner) hasUncompletedTasks() bool {	6✔
464	// try original path first	6✔
465	content, err := os.ReadFile(r.cfg.PlanFile)	6✔
466	if err != nil {	6✔
UNCOV 467	// try completed/ subdirectory as fallback	×
UNCOV 468	completedPath := filepath.Join(filepath.Dir(r.cfg.PlanFile), "completed", filepath.Base(r.cfg.PlanFile))	×
UNCOV 469	content, err = os.ReadFile(completedPath) //nolint:gosec // planFile from CLI args	×
UNCOV 470	if err != nil {	×
UNCOV 471	return true // assume incomplete if can't read from either location	×
UNCOV 472	}	×
473	}
474
475	// look for uncompleted checkbox pattern: [ ] (not [x])
476	for line := range strings.SplitSeq(string(content), "\n") {	21✔
477	trimmed := strings.TrimSpace(line)	15✔
478	if strings.HasPrefix(trimmed, "- [ ]") {	17✔
479	return true	2✔
480	}	2✔
481	}
482	return false	4✔
483	}
484
485	// showCodexSummary displays a condensed summary of codex output before Claude evaluation.
486	// extracts text until first code block or 500 chars, whichever is shorter.
487	func (r *Runner) showCodexSummary(output string) {	3✔
488	summary := output	3✔
489		3✔
490	// trim to first code block if present	3✔
491	if idx := strings.Index(summary, "```"); idx > 0 {	3✔
492	summary = summary[:idx]	×
493	}	×
494
495	// limit to 5000 chars
496	if len(summary) > 5000 {	3✔
497	summary = summary[:5000] + "..."	×
498	}	×
499
500	summary = strings.TrimSpace(summary)	3✔
501	if summary == "" {	3✔
502	return	×
503	}	×
504
505	r.log.Print("codex findings:")	3✔
506	for line := range strings.SplitSeq(summary, "\n") {	6✔
507	if strings.TrimSpace(line) == "" {	3✔
508	continue	×
509	}
510	r.log.PrintAligned(" " + line)	3✔
511	}
512	}

umputun / ralphex / 21305559204

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous