refactor: replace orchestrator/verifier chain with direct LiteLLM calls
Drop the three-layer Claude subprocess orchestration (local model →
Claude verifier → cloud escalation). Skills now call LiteLLM directly
and return plain text to Claude Code, which decides what to do with it.
- Delete executor, orchestrator, verifier, result, attempts packages
- Simplify LiteLLMExecutor: Run(Request)→Result becomes Complete(model,sys,user)→(string,int64,error)
- Replace ExecutorFn with CompleteFunc in all 6 skill configs
- Rewrite all skill handlers to call Complete and return {"text","model","duration_ms"}
- Simplify config/models: remove Verifier/LlamaSwapURL, add ModelFor
- Bump version to v0.5.0
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,111 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config holds executor configuration.
|
||||
type Config struct {
|
||||
ClaudeBinary string // path to claude binary, defaults to "claude"
|
||||
SystemPrompt string // contents of supervisor CLAUDE.md
|
||||
Timeout time.Duration // per-invocation timeout, default 120s
|
||||
LiteLLMBaseURL string // passed to Claude so it can delegate to Ollama
|
||||
LiteLLMAPIKey string // passed to Claude for LiteLLM auth
|
||||
}
|
||||
|
||||
// Request is the input to a single supervisor invocation.
|
||||
type Request struct {
|
||||
SkillPrompt string // skill-specific discipline (e.g. tdd.md contents)
|
||||
TaskPrompt string // the specific task (phase, project_root, spec, model)
|
||||
Model string // resolved model name, passed in task prompt
|
||||
Tools string // comma-separated allowed tools, default "Bash,Read,Write"
|
||||
}
|
||||
|
||||
// Executor spawns a claude instance and captures its structured JSON output.
|
||||
type Executor struct {
|
||||
cfg Config
|
||||
}
|
||||
|
||||
func New(cfg Config) *Executor {
|
||||
if cfg.ClaudeBinary == "" {
|
||||
cfg.ClaudeBinary = "claude"
|
||||
}
|
||||
if cfg.Timeout == 0 {
|
||||
cfg.Timeout = 120 * time.Second
|
||||
}
|
||||
return &Executor{cfg: cfg}
|
||||
}
|
||||
|
||||
func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout)
|
||||
defer cancel()
|
||||
|
||||
tools := req.Tools
|
||||
if tools == "" {
|
||||
tools = "Bash,Read,Write"
|
||||
}
|
||||
|
||||
// Build the full prompt: system rules + skill rules + infra context + task.
|
||||
// LITELLM_API_KEY is injected as a subprocess env var, not in the prompt,
|
||||
// to prevent it appearing in error log output.
|
||||
litellmCtx := fmt.Sprintf("LITELLM_BASE_URL: %s", e.cfg.LiteLLMBaseURL)
|
||||
prompt := strings.Join([]string{
|
||||
e.cfg.SystemPrompt,
|
||||
"---",
|
||||
req.SkillPrompt,
|
||||
"---",
|
||||
litellmCtx,
|
||||
"---",
|
||||
req.TaskPrompt,
|
||||
}, "\n\n")
|
||||
|
||||
args := []string{
|
||||
"--print",
|
||||
"--permission-mode", "bypassPermissions",
|
||||
"--tools", tools,
|
||||
"--json-schema", Schema,
|
||||
"--output-format", "json",
|
||||
}
|
||||
if strings.HasPrefix(req.Model, "claude-") {
|
||||
args = append(args, "--model", req.Model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
||||
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout)
|
||||
}
|
||||
return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
// --output-format json wraps the response in an envelope; structured output
|
||||
// from --json-schema is in the "structured_output" field.
|
||||
var envelope struct {
|
||||
StructuredOutput *Result `json:"structured_output"`
|
||||
IsError bool `json:"is_error"`
|
||||
Result string `json:"result"` // fallback text result for error messages
|
||||
}
|
||||
if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
|
||||
return Result{}, fmt.Errorf("parse envelope JSON: %w — raw: %s — stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
if envelope.StructuredOutput == nil {
|
||||
return Result{}, fmt.Errorf("no structured_output in response — result: %s — stderr: %s", envelope.Result, stderr.String())
|
||||
}
|
||||
if err := envelope.StructuredOutput.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("invalid result: %w", err)
|
||||
}
|
||||
return *envelope.StructuredOutput, nil
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// fakeClaudePath writes a shell script that prints fixed output and returns its path.
|
||||
func fakeClaudePath(t *testing.T, output string, exitCode int) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
var content string
|
||||
if exitCode != 0 {
|
||||
content = "#!/bin/sh\necho 'error' >&2\nexit 1\n"
|
||||
} else {
|
||||
content = "#!/bin/sh\necho '" + output + "'\n"
|
||||
}
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
return script
|
||||
}
|
||||
|
||||
func TestExecutorParsesValidResult(t *testing.T) {
|
||||
// Fake claude emits the --output-format json envelope that the real CLI produces.
|
||||
// The executor extracts the result from the "structured_output" field.
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}}`
|
||||
claude := fakeClaudePath(t, envelope, 0)
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: claude,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
result, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "tdd rules",
|
||||
TaskPrompt: "run red phase",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.True(t, result.Verified)
|
||||
}
|
||||
|
||||
func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) {
|
||||
claude := fakeClaudePath(t, "", 1)
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: claude,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestExecutorTimesOut(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 100 * time.Millisecond,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
||||
assert.ErrorContains(t, err, "timeout")
|
||||
}
|
||||
|
||||
func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
|
||||
// The script captures its args to a temp file so we can assert --model was passed.
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "claude-sonnet-4-6",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
|
||||
}
|
||||
|
||||
func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, string(argsData), "--model")
|
||||
}
|
||||
@@ -9,9 +9,8 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
||||
// Local models are expected to return a JSON object matching the Result schema
|
||||
// as their response content — no envelope.
|
||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
|
||||
// and returns the raw assistant message text.
|
||||
type LiteLLMExecutor struct {
|
||||
baseURL string
|
||||
apiKey string
|
||||
@@ -21,9 +20,12 @@ type LiteLLMExecutor struct {
|
||||
// NewLiteLLM creates a LiteLLMExecutor.
|
||||
// timeout applies to the full HTTP round-trip per call.
|
||||
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||
if timeout == 0 {
|
||||
timeout = 120 * time.Second
|
||||
}
|
||||
return &LiteLLMExecutor{
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
httpClient: &http.Client{Timeout: timeout},
|
||||
}
|
||||
}
|
||||
@@ -46,58 +48,50 @@ type litellmResponse struct {
|
||||
Choices []litellmChoice `json:"choices"`
|
||||
}
|
||||
|
||||
// Run dispatches req to the LiteLLM server and parses the Result from the
|
||||
// assistant message content. Returns an error on network failure, non-200
|
||||
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
||||
// as automatic escalation triggers.
|
||||
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
// Complete sends system+user messages to the given model and returns the raw
|
||||
// assistant text along with the round-trip duration in milliseconds.
|
||||
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
|
||||
body := litellmRequest{
|
||||
Model: req.Model,
|
||||
Model: model,
|
||||
Messages: []litellmMessage{
|
||||
{Role: "system", Content: req.SkillPrompt},
|
||||
{Role: "user", Content: req.TaskPrompt},
|
||||
{Role: "system", Content: system},
|
||||
{Role: "user", Content: user},
|
||||
},
|
||||
}
|
||||
|
||||
bodyBytes, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: create request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if e.apiKey != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||
}
|
||||
|
||||
t0 := time.Now()
|
||||
resp, err := e.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
durationMs := time.Since(t0).Milliseconds()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||
return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var chatResp litellmResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: decode response: %w", err)
|
||||
}
|
||||
if len(chatResp.Choices) == 0 {
|
||||
return Result{}, fmt.Errorf("litellm: no choices in response")
|
||||
return "", 0, fmt.Errorf("litellm: no choices in response")
|
||||
}
|
||||
|
||||
content := chatResp.Choices[0].Message.Content
|
||||
var result Result
|
||||
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
||||
}
|
||||
if err := result.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
return chatResp.Choices[0].Message.Content, durationMs, nil
|
||||
}
|
||||
|
||||
@@ -13,23 +13,11 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func validLiteLLMResult() iexec.Result {
|
||||
return iexec.Result{
|
||||
Status: "pass",
|
||||
Phase: "review",
|
||||
Skill: "review",
|
||||
ModelUsed: "ollama/devstral",
|
||||
Message: "looks good",
|
||||
}
|
||||
}
|
||||
|
||||
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||
func chatResponse(t *testing.T, content string) []byte {
|
||||
t.Helper()
|
||||
content, err := json.Marshal(result)
|
||||
require.NoError(t, err)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
||||
{"message": map[string]any{"role": "assistant", "content": content}},
|
||||
},
|
||||
}
|
||||
data, err := json.Marshal(resp)
|
||||
@@ -37,25 +25,21 @@ func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||
return data
|
||||
}
|
||||
|
||||
func TestLiteLLMParsesValidResult(t *testing.T) {
|
||||
func TestLiteLLMReturnsText(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
_, _ = w.Write(chatResponse(t, "here is my analysis"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
result, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "review the code",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
text, dur, err := ex.Complete(context.Background(), "ollama/devstral", "system prompt", "user prompt")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Equal(t, "review", result.Skill)
|
||||
assert.Equal(t, "here is my analysis", text)
|
||||
assert.GreaterOrEqual(t, dur, int64(0))
|
||||
}
|
||||
|
||||
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||
@@ -63,12 +47,12 @@ func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
_, _ = w.Write(chatResponse(t, "ok"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
@@ -79,34 +63,28 @@ func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
assert.ErrorContains(t, err, "503")
|
||||
}
|
||||
|
||||
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
||||
func TestLiteLLMErrorOnEmptyChoices(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
||||
},
|
||||
}
|
||||
data, _ := json.Marshal(resp)
|
||||
_, _ = w.Write(data)
|
||||
_, _ = w.Write([]byte(`{"choices":[]}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
assert.Error(t, err)
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
assert.ErrorContains(t, err, "no choices")
|
||||
}
|
||||
|
||||
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // Cancel immediately
|
||||
cancel()
|
||||
|
||||
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
_, _, err := ex.Complete(ctx, "model", "sys", "user")
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ChainEntry is one tier in an escalation chain.
|
||||
type ChainEntry struct {
|
||||
Model string // e.g. "ollama/phi4", "claude-sonnet-4-6"
|
||||
Tier string // "local" | "subagent" | "managed"
|
||||
IsCloud bool // true for claude-* models; skips verifier call
|
||||
}
|
||||
|
||||
// EntryFor builds a ChainEntry from a model name string.
|
||||
func EntryFor(model string) ChainEntry {
|
||||
cloud := strings.HasPrefix(model, "claude-")
|
||||
tier := "local"
|
||||
if cloud {
|
||||
tier = "subagent"
|
||||
}
|
||||
return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
|
||||
}
|
||||
|
||||
// AttemptRecord captures the outcome of one tier attempt for session logging.
|
||||
type AttemptRecord struct {
|
||||
Model string
|
||||
Tier string
|
||||
DurationMs int64
|
||||
WarmStart bool
|
||||
Verdict string // "accept" | "escalate" | "error"
|
||||
Feedback string
|
||||
}
|
||||
|
||||
// VerifierFn is the interface the orchestrator uses to verify local output.
|
||||
type VerifierFn interface {
|
||||
Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
||||
}
|
||||
|
||||
// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
|
||||
type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
|
||||
|
||||
// Orchestrator walks an escalation chain, delegating generation and verification.
|
||||
// It implements the ExecutorFn shape expected by skill handlers.
|
||||
type Orchestrator struct {
|
||||
chain []ChainEntry
|
||||
localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil
|
||||
cloudRun ExecutorRunFn // for cloud tiers; may be nil
|
||||
verifier VerifierFn
|
||||
llamaSwapURL string
|
||||
attempts *[]AttemptRecord
|
||||
}
|
||||
|
||||
// NewOrchestrator creates an Orchestrator.
|
||||
// attempts is a pointer to a slice that will be appended to on each tier attempt.
|
||||
// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
|
||||
func NewOrchestrator(
|
||||
chain []ChainEntry,
|
||||
localRun ExecutorRunFn,
|
||||
cloudRun ExecutorRunFn,
|
||||
verifier VerifierFn,
|
||||
llamaSwapURL string,
|
||||
attempts *[]AttemptRecord,
|
||||
) *Orchestrator {
|
||||
return &Orchestrator{
|
||||
chain: chain,
|
||||
localRun: localRun,
|
||||
cloudRun: cloudRun,
|
||||
verifier: verifier,
|
||||
llamaSwapURL: llamaSwapURL,
|
||||
attempts: attempts,
|
||||
}
|
||||
}
|
||||
|
||||
// Run walks the escalation chain and returns the first accepted result.
|
||||
// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
|
||||
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
|
||||
taskPrompt := req.TaskPrompt
|
||||
|
||||
for _, entry := range o.chain {
|
||||
warm := o.probeWarm(entry.Model)
|
||||
start := time.Now()
|
||||
|
||||
tierReq := req
|
||||
tierReq.Model = entry.Model
|
||||
tierReq.TaskPrompt = taskPrompt
|
||||
|
||||
if entry.IsCloud {
|
||||
result, genErr := o.cloudRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
verdict := "accept"
|
||||
if genErr != nil {
|
||||
verdict = "error"
|
||||
}
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: verdict,
|
||||
})
|
||||
if genErr == nil {
|
||||
return result, nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Local tier.
|
||||
result, genErr := o.localRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
|
||||
if genErr != nil {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "error",
|
||||
Feedback: genErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
|
||||
if verErr != nil {
|
||||
// Treat verifier failure as escalate (safe default).
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: "verifier error: " + verErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if verdict.Accept {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "accept",
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: verdict.Feedback,
|
||||
})
|
||||
// Inject verifier feedback into the next tier's task prompt.
|
||||
taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
||||
}
|
||||
|
||||
return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
|
||||
}
|
||||
|
||||
func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
|
||||
if o.attempts != nil {
|
||||
*o.attempts = append(*o.attempts, rec)
|
||||
}
|
||||
}
|
||||
|
||||
// probeWarm checks whether the model is currently loaded in llama-swap.
|
||||
// Returns false on any error or if llamaSwapURL is empty.
|
||||
func (o *Orchestrator) probeWarm(model string) bool {
|
||||
if o.llamaSwapURL == "" {
|
||||
return false
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(string(body), model)
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// stubRunFn returns preset results sequentially.
|
||||
type stubRunFn struct {
|
||||
calls []stubCall
|
||||
callIdx int
|
||||
}
|
||||
|
||||
type stubCall struct {
|
||||
result iexec.Result
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
|
||||
if s.callIdx >= len(s.calls) {
|
||||
return iexec.Result{}, errors.New("unexpected call")
|
||||
}
|
||||
c := s.calls[s.callIdx]
|
||||
s.callIdx++
|
||||
return c.result, c.err
|
||||
}
|
||||
|
||||
// stubVerifier returns preset verdicts sequentially.
|
||||
type stubVerifier struct {
|
||||
verdicts []iexec.Verdict
|
||||
idx int
|
||||
}
|
||||
|
||||
func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
|
||||
if s.idx >= len(s.verdicts) {
|
||||
return iexec.Verdict{}, errors.New("unexpected verify call")
|
||||
}
|
||||
v := s.verdicts[s.idx]
|
||||
s.idx++
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func okResult(skill string) iexec.Result {
|
||||
return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
|
||||
}
|
||||
|
||||
func TestOrchestratorSingleLocalAccept(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "local", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{
|
||||
{Accept: false, Feedback: "missing line refs"},
|
||||
{Accept: true},
|
||||
}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "escalate", attempts[0].Verdict)
|
||||
assert.Equal(t, "missing line refs", attempts[0].Feedback)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{err: errors.New("network failure")},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "error", attempts[0].Verdict)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
|
||||
cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{} // no verdicts — must not be called
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
|
||||
nil, cloud.Run, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "subagent", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
assert.Equal(t, 0, verifier.idx) // verifier never called
|
||||
}
|
||||
|
||||
func TestOrchestratorAllTiersExhausted(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, &stubVerifier{}, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
assert.ErrorContains(t, err, "all tiers exhausted")
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Result is the structured JSON output from every supervisor invocation.
|
||||
// The JSON schema constant is passed to claude via --json-schema so Claude
|
||||
// validates its own output before returning.
|
||||
type Result struct {
|
||||
Status string `json:"status"` // pass | fail | error
|
||||
Phase string `json:"phase"` // red | green | refactor | retrospective | review | debug | spec | trainer
|
||||
Skill string `json:"skill"` // tdd | review | ...
|
||||
FilePath string `json:"file_path"` // absolute path to generated file
|
||||
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
||||
Verified bool `json:"verified"` // based on exit code, never self-report
|
||||
ModelUsed string `json:"model_used"` // model name or "self"
|
||||
Message string `json:"message"` // one sentence summary
|
||||
Attempts []AttemptRecord `json:"attempts,omitempty"` // populated by orchestrator, not Claude
|
||||
}
|
||||
|
||||
var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true}
|
||||
var validPhases = map[string]bool{
|
||||
"red": true,
|
||||
"green": true,
|
||||
"refactor": true,
|
||||
"retrospective": true,
|
||||
"review": true,
|
||||
"debug": true,
|
||||
"spec": true,
|
||||
"trainer": true,
|
||||
}
|
||||
|
||||
func (r Result) Validate() error {
|
||||
var errs []string
|
||||
if !validStatuses[r.Status] {
|
||||
errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
|
||||
}
|
||||
if !validPhases[r.Phase] {
|
||||
errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase)
|
||||
}
|
||||
if r.Skill == "" {
|
||||
errs = append(errs, "skill is required")
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
return errors.New(strings.Join(errs, "; "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Schema is passed to claude --json-schema to enforce structured output.
|
||||
const Schema = `{
|
||||
"type": "object",
|
||||
"required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
|
||||
"properties": {
|
||||
"status": {"type": "string", "enum": ["pass","fail","error"]},
|
||||
"phase": {"type": "string"},
|
||||
"skill": {"type": "string"},
|
||||
"file_path": {"type": "string"},
|
||||
"runner_output": {"type": "string"},
|
||||
"verified": {"type": "boolean"},
|
||||
"model_used": {"type": "string"},
|
||||
"message": {"type": "string"}
|
||||
}
|
||||
}`
|
||||
@@ -1,79 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestResultParsesValidJSON(t *testing.T) {
|
||||
raw := `{
|
||||
"status": "pass",
|
||||
"phase": "red",
|
||||
"skill": "tdd",
|
||||
"file_path": "/tmp/foo_test.go",
|
||||
"runner_output": "--- FAIL: TestFoo",
|
||||
"verified": true,
|
||||
"model_used": "self",
|
||||
"message": "test fails as expected"
|
||||
}`
|
||||
var r exec.Result
|
||||
require.NoError(t, json.Unmarshal([]byte(raw), &r))
|
||||
assert.Equal(t, "pass", r.Status)
|
||||
assert.Equal(t, "red", r.Phase)
|
||||
assert.True(t, r.Verified)
|
||||
}
|
||||
|
||||
func TestResultValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
result exec.Result
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid pass result",
|
||||
result: exec.Result{
|
||||
Status: "pass", Phase: "red", Skill: "tdd",
|
||||
FilePath: "/tmp/x_test.go", RunnerOutput: "FAIL",
|
||||
Verified: true, ModelUsed: "self", Message: "ok",
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty status",
|
||||
result: exec.Result{Phase: "red", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid status",
|
||||
result: exec.Result{Status: "unknown", Phase: "red", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid phase",
|
||||
result: exec.Result{Status: "pass", Phase: "bad", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.result.Validate()
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateAcceptsAllPhases(t *testing.T) {
|
||||
phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"}
|
||||
for _, phase := range phases {
|
||||
r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"}
|
||||
assert.NoError(t, r.Validate(), "phase %q should be valid", phase)
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Verdict is the output of a Claude verification call.
|
||||
type Verdict struct {
|
||||
Accept bool `json:"accept"`
|
||||
Feedback string `json:"feedback"` // empty when Accept is true
|
||||
}
|
||||
|
||||
// Verifier runs a focused Claude call to judge local model output.
|
||||
type Verifier struct {
|
||||
claudeBinary string
|
||||
model string
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
||||
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
||||
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
||||
if claudeBinary == "" {
|
||||
claudeBinary = "claude"
|
||||
}
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
return &Verifier{
|
||||
claudeBinary: claudeBinary,
|
||||
model: model,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
||||
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
||||
// to escalate. Returns an error on subprocess failure or unparseable response.
|
||||
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
||||
defer cancel()
|
||||
|
||||
outputJSON, err := json.Marshal(output)
|
||||
if err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
||||
|
||||
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
||||
|
||||
Reply with JSON only — no other text:
|
||||
{"accept": true, "feedback": ""}
|
||||
or
|
||||
{"accept": false, "feedback": "<one sentence reason>"}
|
||||
|
||||
## Skill discipline
|
||||
%s
|
||||
|
||||
## Original task
|
||||
%s
|
||||
|
||||
## Generated output
|
||||
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
||||
|
||||
args := []string{
|
||||
"--print",
|
||||
"--permission-mode", "bypassPermissions",
|
||||
}
|
||||
if v.model != "" {
|
||||
args = append(args, "--model", v.model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
||||
cmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
||||
}
|
||||
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
var verdict Verdict
|
||||
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
||||
}
|
||||
return verdict, nil
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
|
||||
t.Helper()
|
||||
data, err := json.Marshal(verdict)
|
||||
require.NoError(t, err)
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
return script
|
||||
}
|
||||
|
||||
func TestVerifierAccepts(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, verdict.Accept)
|
||||
assert.Empty(t, verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierEscalates(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, verdict.Accept)
|
||||
assert.Equal(t, "missing line references", verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnNonZeroExit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
Reference in New Issue
Block a user