refactor: replace orchestrator/verifier chain with direct LiteLLM calls

Drop the three-layer Claude subprocess orchestration (local model → Claude verifier → cloud escalation). Skills now call LiteLLM directly and return plain text to Claude Code, which decides what to do with it. - Delete executor, orchestrator, verifier, result, attempts packages - Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error) - Replace ExecutorFn with CompleteFunc in all 6 skill configs - Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"} - Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor - Bump version to v0.5.0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 16:19:09 +02:00
parent 823de23213
commit ce45592730
34 changed files with 266 additions and 1432 deletions
--- a/internal/skills/tdd/handlers.go
+++ b/internal/skills/tdd/handlers.go
@@ -7,7 +7,6 @@ import (
 	"time"

 	"github.com/mathiasbq/supervisor/internal/brain"
-	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/session"
 )

@@ -51,7 +50,7 @@ func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMes
 	if brainCtx != "" {
 		task = brainCtx + "\n---\n\n" + task
 	}
-	return s.execute(ctx, task)
+	return s.complete(ctx, s.resolveModel(args.Model), task)
 }

 type greenArgs struct {
@@ -80,11 +79,11 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
 	task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)

 	t0 := time.Now()
-	result, err := s.execute(ctx, task)
+	result, err := s.complete(ctx, s.resolveModel(args.Model), task)
 	if err != nil {
 		return nil, err
 	}
-	s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "green", t0, result)
+	s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "green", s.resolveModel(args.Model), t0, result)
 	return result, nil
 }

@@ -118,11 +117,11 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
 	task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)

 	t0 := time.Now()
-	result, err := s.execute(ctx, task)
+	result, err := s.complete(ctx, s.resolveModel(args.Model), task)
 	if err != nil {
 		return nil, err
 	}
-	s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "refactor", t0, result)
+	s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "refactor", s.resolveModel(args.Model), t0, result)
 	return result, nil
 }

@@ -133,31 +132,32 @@ func (s *Skill) resolveModel(override string) string {
 	return s.cfg.DefaultModel
 }

-// execute calls ExecutorFn and returns the marshaled result.
-func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) {
-	if s.cfg.ExecutorFn == nil {
+// complete calls CompleteFunc and returns the text as JSON.
+func (s *Skill) complete(ctx context.Context, model, task string) (json.RawMessage, error) {
+	if s.cfg.CompleteFunc == nil {
 		return nil, fmt.Errorf("no executor configured")
 	}
-	req := iexec.Request{
-		SkillPrompt: s.cfg.SkillPrompt,
-		TaskPrompt:  task,
-	}
-	result, err := s.cfg.ExecutorFn(ctx, req)
+	text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
 	if err != nil {
 		return nil, err
 	}
-	return json.Marshal(result)
+	return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
 }

-// logAttempt writes a session.Entry for a completed phase if session_id is set.
-// raw is the marshaled Result returned by execute; we unmarshal to extract fields.
-func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.Time, raw json.RawMessage) {
+// logEntry writes a session.Entry for a completed phase if session_id is set.
+func (s *Skill) logEntry(sessionID, projectRoot, skill, phase, model string, t0 time.Time, raw json.RawMessage) {
 	if sessionID == "" || s.cfg.SessionsDir == "" {
 		return
 	}
-	var result iexec.Result
-	if err := json.Unmarshal(raw, &result); err != nil {
-		return
+	var msg string
+	var result struct {
+		Text string `json:"text"`
+	}
+	if err := json.Unmarshal(raw, &result); err == nil && len(result.Text) > 0 {
+		msg = result.Text
+		if len(msg) > 200 {
+			msg = msg[:200]
+		}
 	}
 	_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
 		SessionID:   sessionID,
@@ -165,11 +165,9 @@ func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.
 		Skill:       skill,
 		Phase:       phase,
 		ProjectRoot: projectRoot,
-		Attempts:    session.AttemptsFrom(result.Attempts),
-		FinalStatus: result.Status,
-		FilePath:    result.FilePath,
-		ModelUsed:   result.ModelUsed,
+		FinalStatus: "ok",
+		ModelUsed:   model,
 		DurationMs:  time.Since(t0).Milliseconds(),
-		Message:     result.Message,
+		Message:     msg,
 	})
 }
--- a/internal/skills/tdd/handlers_test.go
+++ b/internal/skills/tdd/handlers_test.go
@@ -5,7 +5,6 @@ import (
 	"encoding/json"
 	"testing"

-	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/session"
 	"github.com/mathiasbq/supervisor/internal/skills/tdd"
 	"github.com/stretchr/testify/assert"
@@ -14,8 +13,7 @@ import (

 func TestTDDSkillTools(t *testing.T) {
 	skill := tdd.New(tdd.Config{
-		SystemPrompt: "supervisor rules",
-		SkillPrompt:  "tdd rules",
+		SkillPrompt: "tdd rules",
 	})
 	tools := skill.Tools()
 	names := make([]string, len(tools))
@@ -26,19 +24,19 @@ func TestTDDSkillTools(t *testing.T) {
 }

 func TestTDDSkillHandleUnknown(t *testing.T) {
-	skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
+	skill := tdd.New(tdd.Config{SkillPrompt: "t"})
 	_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
 	assert.ErrorContains(t, err, "unknown tool")
 }

 func TestTDDRedRequiresProjectRoot(t *testing.T) {
-	skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
+	skill := tdd.New(tdd.Config{SkillPrompt: "t"})
 	_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
 	assert.ErrorContains(t, err, "project_root")
 }

 func TestTDDRedRequiresSpec(t *testing.T) {
-	skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
+	skill := tdd.New(tdd.Config{SkillPrompt: "t"})
 	_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
 	assert.ErrorContains(t, err, "spec")
 }
@@ -51,35 +49,49 @@ func TestTDDGreenInjectsSessionHistory(t *testing.T) {
 		Message:  "wrote failing test for Foo",
 	}))

-	var capturedPrompt string
-	fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
-		capturedPrompt = req.TaskPrompt
-		return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
+	var capturedTask string
+	fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
+		capturedTask = user
+		return "here is my suggestion", 100, nil
 	}

-	sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir})
+	sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: sessDir})
 	_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
 		`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
 	))
 	require.NoError(t, err)
-	assert.Contains(t, capturedPrompt, "## Session history")
-	assert.Contains(t, capturedPrompt, "wrote failing test for Foo")
+	assert.Contains(t, capturedTask, "## Session history")
+	assert.Contains(t, capturedTask, "wrote failing test for Foo")
 }

 func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
-	var capturedPrompt string
-	fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
-		capturedPrompt = req.TaskPrompt
-		return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
+	var capturedTask string
+	fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
+		capturedTask = user
+		return "suggestion", 50, nil
 	}

-	sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
+	sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
 	_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
 		`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
 	))
 	require.NoError(t, err)
-	assert.NotContains(t, capturedPrompt, "## Session history")
+	assert.NotContains(t, capturedTask, "## Session history")
 }

-// Ensure require is used (avoids import error).
-var _ = require.New
+func TestTDDGreenReturnsTextJSON(t *testing.T) {
+	fakeFn := func(_ context.Context, _, _, _ string) (string, int64, error) {
+		return "write a func that adds two ints", 42, nil
+	}
+
+	sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn})
+	raw, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
+		`{"project_root":"/tmp","test_path":"foo_test.go"}`,
+	))
+	require.NoError(t, err)
+
+	var result map[string]any
+	require.NoError(t, json.Unmarshal(raw, &result))
+	assert.Equal(t, "write a func that adds two ints", result["text"])
+	assert.Equal(t, float64(42), result["duration_ms"])
+}
--- a/internal/skills/tdd/skill.go
+++ b/internal/skills/tdd/skill.go
@@ -4,17 +4,15 @@ import (
 	"context"
 	"encoding/json"

-	iexec "github.com/mathiasbq/supervisor/internal/exec"
 	"github.com/mathiasbq/supervisor/internal/registry"
 )

-// ExecutorFn allows injecting a test double for the executor.
-type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
+// CompleteFunc is the function used to call a local model.
+type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)

 type Config struct {
-	SystemPrompt  string
 	SkillPrompt   string
-	ExecutorFn    ExecutorFn // nil = no executor (tests that don't reach execute())
+	CompleteFunc  CompleteFunc // nil = no executor (tests that don't reach execute())
 	DefaultModel  string
 	SessionsDir   string // optional: path to brain/sessions/ for history injection
 	IngestBaseURL string // optional: base URL of ingestion server for brain context
@@ -44,7 +42,7 @@ func (s *Skill) Tools() []registry.ToolDef {
 	return []registry.ToolDef{
 		{
 			Name:        "tdd_red",
-			Description: "Write a failing test for the described behavior. Verifies the test fails before returning.",
+			Description: "Consult a local model for help writing a failing test for the described behavior.",
 			InputSchema: schema(
 				[]string{"project_root", "spec"},
 				map[string]any{
@@ -57,7 +55,7 @@ func (s *Skill) Tools() []registry.ToolDef {
 		},
 		{
 			Name:        "tdd_green",
-			Description: "Write minimal implementation to make the test at test_path pass.",
+			Description: "Consult a local model for implementation ideas to make the test at test_path pass.",
 			InputSchema: schema(
 				[]string{"project_root", "test_path"},
 				map[string]any{
@@ -71,7 +69,7 @@ func (s *Skill) Tools() []registry.ToolDef {
 		},
 		{
 			Name:        "tdd_refactor",
-			Description: "Refactor the implementation at impl_path while keeping tests green.",
+			Description: "Consult a local model for refactoring suggestions for impl_path while keeping tests green.",
 			InputSchema: schema(
 				[]string{"project_root", "test_path", "impl_path"},
 				map[string]any{