refactor: replace orchestrator/verifier chain with direct LiteLLM calls
All checks were successful
cd / Build and deploy (push) Successful in 6s
CI / Lint / Test / Vet (push) Successful in 10s
CI / Mirror to GitHub (push) Successful in 3s

Drop the three-layer Claude subprocess orchestration (local model →
Claude verifier → cloud escalation). Skills now call LiteLLM directly
and return plain text to Claude Code, which decides what to do with it.

- Delete executor, orchestrator, verifier, result, attempts packages
- Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error)
- Replace ExecutorFn with CompleteFunc in all 6 skill configs
- Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"}
- Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor
- Bump version to v0.5.0

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mathias Bergqvist
2026-04-22 16:19:09 +02:00
parent 823de23213
commit ce45592730
34 changed files with 266 additions and 1432 deletions

View File

@@ -7,7 +7,6 @@ import (
"time"
"github.com/mathiasbq/supervisor/internal/brain"
iexec "github.com/mathiasbq/supervisor/internal/exec"
"github.com/mathiasbq/supervisor/internal/session"
)
@@ -51,7 +50,7 @@ func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMes
if brainCtx != "" {
task = brainCtx + "\n---\n\n" + task
}
return s.execute(ctx, task)
return s.complete(ctx, s.resolveModel(args.Model), task)
}
type greenArgs struct {
@@ -80,11 +79,11 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)
t0 := time.Now()
result, err := s.execute(ctx, task)
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
if err != nil {
return nil, err
}
s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "green", t0, result)
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "green", s.resolveModel(args.Model), t0, result)
return result, nil
}
@@ -118,11 +117,11 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)
t0 := time.Now()
result, err := s.execute(ctx, task)
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
if err != nil {
return nil, err
}
s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "refactor", t0, result)
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "refactor", s.resolveModel(args.Model), t0, result)
return result, nil
}
@@ -133,31 +132,32 @@ func (s *Skill) resolveModel(override string) string {
return s.cfg.DefaultModel
}
// execute calls ExecutorFn and returns the marshaled result.
func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) {
if s.cfg.ExecutorFn == nil {
// complete calls CompleteFunc and returns the text as JSON.
func (s *Skill) complete(ctx context.Context, model, task string) (json.RawMessage, error) {
if s.cfg.CompleteFunc == nil {
return nil, fmt.Errorf("no executor configured")
}
req := iexec.Request{
SkillPrompt: s.cfg.SkillPrompt,
TaskPrompt: task,
}
result, err := s.cfg.ExecutorFn(ctx, req)
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
if err != nil {
return nil, err
}
return json.Marshal(result)
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
}
// logAttempt writes a session.Entry for a completed phase if session_id is set.
// raw is the marshaled Result returned by execute; we unmarshal to extract fields.
func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.Time, raw json.RawMessage) {
// logEntry writes a session.Entry for a completed phase if session_id is set.
func (s *Skill) logEntry(sessionID, projectRoot, skill, phase, model string, t0 time.Time, raw json.RawMessage) {
if sessionID == "" || s.cfg.SessionsDir == "" {
return
}
var result iexec.Result
if err := json.Unmarshal(raw, &result); err != nil {
return
var msg string
var result struct {
Text string `json:"text"`
}
if err := json.Unmarshal(raw, &result); err == nil && len(result.Text) > 0 {
msg = result.Text
if len(msg) > 200 {
msg = msg[:200]
}
}
_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
SessionID: sessionID,
@@ -165,11 +165,9 @@ func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.
Skill: skill,
Phase: phase,
ProjectRoot: projectRoot,
Attempts: session.AttemptsFrom(result.Attempts),
FinalStatus: result.Status,
FilePath: result.FilePath,
ModelUsed: result.ModelUsed,
FinalStatus: "ok",
ModelUsed: model,
DurationMs: time.Since(t0).Milliseconds(),
Message: result.Message,
Message: msg,
})
}

View File

@@ -5,7 +5,6 @@ import (
"encoding/json"
"testing"
iexec "github.com/mathiasbq/supervisor/internal/exec"
"github.com/mathiasbq/supervisor/internal/session"
"github.com/mathiasbq/supervisor/internal/skills/tdd"
"github.com/stretchr/testify/assert"
@@ -14,8 +13,7 @@ import (
func TestTDDSkillTools(t *testing.T) {
skill := tdd.New(tdd.Config{
SystemPrompt: "supervisor rules",
SkillPrompt: "tdd rules",
SkillPrompt: "tdd rules",
})
tools := skill.Tools()
names := make([]string, len(tools))
@@ -26,19 +24,19 @@ func TestTDDSkillTools(t *testing.T) {
}
func TestTDDSkillHandleUnknown(t *testing.T) {
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
assert.ErrorContains(t, err, "unknown tool")
}
func TestTDDRedRequiresProjectRoot(t *testing.T) {
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
assert.ErrorContains(t, err, "project_root")
}
func TestTDDRedRequiresSpec(t *testing.T) {
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
assert.ErrorContains(t, err, "spec")
}
@@ -51,35 +49,49 @@ func TestTDDGreenInjectsSessionHistory(t *testing.T) {
Message: "wrote failing test for Foo",
}))
var capturedPrompt string
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
capturedPrompt = req.TaskPrompt
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
var capturedTask string
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
capturedTask = user
return "here is my suggestion", 100, nil
}
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir})
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: sessDir})
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
))
require.NoError(t, err)
assert.Contains(t, capturedPrompt, "## Session history")
assert.Contains(t, capturedPrompt, "wrote failing test for Foo")
assert.Contains(t, capturedTask, "## Session history")
assert.Contains(t, capturedTask, "wrote failing test for Foo")
}
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
var capturedPrompt string
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
capturedPrompt = req.TaskPrompt
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
var capturedTask string
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
capturedTask = user
return "suggestion", 50, nil
}
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
))
require.NoError(t, err)
assert.NotContains(t, capturedPrompt, "## Session history")
assert.NotContains(t, capturedTask, "## Session history")
}
// Ensure require is used (avoids import error).
var _ = require.New
func TestTDDGreenReturnsTextJSON(t *testing.T) {
fakeFn := func(_ context.Context, _, _, _ string) (string, int64, error) {
return "write a func that adds two ints", 42, nil
}
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn})
raw, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
`{"project_root":"/tmp","test_path":"foo_test.go"}`,
))
require.NoError(t, err)
var result map[string]any
require.NoError(t, json.Unmarshal(raw, &result))
assert.Equal(t, "write a func that adds two ints", result["text"])
assert.Equal(t, float64(42), result["duration_ms"])
}

View File

@@ -4,17 +4,15 @@ import (
"context"
"encoding/json"
iexec "github.com/mathiasbq/supervisor/internal/exec"
"github.com/mathiasbq/supervisor/internal/registry"
)
// ExecutorFn allows injecting a test double for the executor.
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
// CompleteFunc is the function used to call a local model.
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
type Config struct {
SystemPrompt string
SkillPrompt string
ExecutorFn ExecutorFn // nil = no executor (tests that don't reach execute())
CompleteFunc CompleteFunc // nil = no executor (tests that don't reach execute())
DefaultModel string
SessionsDir string // optional: path to brain/sessions/ for history injection
IngestBaseURL string // optional: base URL of ingestion server for brain context
@@ -44,7 +42,7 @@ func (s *Skill) Tools() []registry.ToolDef {
return []registry.ToolDef{
{
Name: "tdd_red",
Description: "Write a failing test for the described behavior. Verifies the test fails before returning.",
Description: "Consult a local model for help writing a failing test for the described behavior.",
InputSchema: schema(
[]string{"project_root", "spec"},
map[string]any{
@@ -57,7 +55,7 @@ func (s *Skill) Tools() []registry.ToolDef {
},
{
Name: "tdd_green",
Description: "Write minimal implementation to make the test at test_path pass.",
Description: "Consult a local model for implementation ideas to make the test at test_path pass.",
InputSchema: schema(
[]string{"project_root", "test_path"},
map[string]any{
@@ -71,7 +69,7 @@ func (s *Skill) Tools() []registry.ToolDef {
},
{
Name: "tdd_refactor",
Description: "Refactor the implementation at impl_path while keeping tests green.",
Description: "Consult a local model for refactoring suggestions for impl_path while keeping tests green.",
InputSchema: schema(
[]string{"project_root", "test_path", "impl_path"},
map[string]any{