diff --git a/cmd/supervisor/main.go b/cmd/supervisor/main.go index a57494c..14672d0 100644 --- a/cmd/supervisor/main.go +++ b/cmd/supervisor/main.go @@ -13,6 +13,10 @@ import ( "github.com/mathiasbq/supervisor/internal/skills/brain" "github.com/mathiasbq/supervisor/internal/skills/org" "github.com/mathiasbq/supervisor/internal/skills/retrospective" + skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug" + "github.com/mathiasbq/supervisor/internal/skills/review" + "github.com/mathiasbq/supervisor/internal/skills/spec" + "github.com/mathiasbq/supervisor/internal/skills/trainer" "github.com/mathiasbq/supervisor/internal/skills/sessionlog" "github.com/mathiasbq/supervisor/internal/skills/tdd" "github.com/mathiasbq/supervisor/internal/tier" @@ -51,6 +55,35 @@ func main() { os.Exit(1) } + reviewPrompt, err := os.ReadFile(cfg.ConfigDir + "/review.md") + if err != nil { + logger.Error("read review.md", "path", cfg.ConfigDir+"/review.md", "err", err) + os.Exit(1) + } + + debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md") + if err != nil { + logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err) + os.Exit(1) + } + + specPrompt, err := os.ReadFile(cfg.ConfigDir + "/spec.md") + if err != nil { + logger.Error("read spec.md", "path", cfg.ConfigDir+"/spec.md", "err", err) + os.Exit(1) + } + + trainerReaderPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-reader.md") + if err != nil { + logger.Error("read trainer-reader.md", "path", cfg.ConfigDir+"/trainer-reader.md", "err", err) + os.Exit(1) + } + trainerWriterPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-writer.md") + if err != nil { + logger.Error("read trainer-writer.md", "path", cfg.ConfigDir+"/trainer-writer.md", "err", err) + os.Exit(1) + } + executor := iexec.New(iexec.Config{ SystemPrompt: string(systemPrompt), LiteLLMBaseURL: cfg.LiteLLMBaseURL, @@ -67,6 +100,7 @@ func main() { SkillPrompt: string(tddPrompt), DefaultModel: models.Resolve("tdd", ""), ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, })) reg.Register(brain.New(brain.Config{ IngestBaseURL: cfg.IngestBaseURL, @@ -83,6 +117,32 @@ func main() { SessionsDir: cfg.SessionsDir, ExecutorFn: executor.Run, })) + reg.Register(review.New(review.Config{ + SkillPrompt: string(reviewPrompt), + DefaultModel: models.Resolve("review", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(skilldebug.New(skilldebug.Config{ + SkillPrompt: string(debugPrompt), + DefaultModel: models.Resolve("debug", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(spec.New(spec.Config{ + SkillPrompt: string(specPrompt), + DefaultModel: models.Resolve("spec", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + })) + reg.Register(trainer.New(trainer.Config{ + ReaderPrompt: string(trainerReaderPrompt), + WriterPrompt: string(trainerWriterPrompt), + DefaultModel: models.Resolve("trainer", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + BrainDir: cfg.BrainDir, + })) srv := mcp.NewServer(reg) mux := http.NewServeMux() diff --git a/config/models.yaml b/config/models.yaml index f26fb9b..bc612b5 100644 --- a/config/models.yaml +++ b/config/models.yaml @@ -9,3 +9,5 @@ skills: review: ollama/devstral-tuned debug: ollama/deepseek-r1-tuned retrospective: ollama/qwen3-coder-30b-tuned + spec: ollama/qwen3-coder-30b-tuned + trainer: ollama/qwen3-coder-30b-tuned diff --git a/config/supervisor/debug.md b/config/supervisor/debug.md new file mode 100644 index 0000000..c800dc5 --- /dev/null +++ b/config/supervisor/debug.md @@ -0,0 +1,31 @@ +# Debug Discipline + +You are a systematic debugger. Form hypotheses before suggesting fixes. + +## Iron laws +1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct +2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first) +3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses + +## Output contract +Return JSON result with: +- `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse) +- `phase`: "debug" +- `skill`: "debug" +- `file_path`: the most relevant file to the error (read it) +- `runner_output`: your hypotheses, formatted as: + ``` + HYPOTHESIS 1 (likelihood: high): + VERIFY: → expected if correct: + + HYPOTHESIS 2 (likelihood: medium): + VERIFY: → expected if correct: + ``` +- `verified`: false — verification is the caller's job +- `message`: "N hypotheses for: " + +## Rules +1. Read the error and any context files provided before forming hypotheses +2. Identify the failure mode first — what actually went wrong, not just what the error says +3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output +4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1 diff --git a/config/supervisor/review.md b/config/supervisor/review.md new file mode 100644 index 0000000..453607c --- /dev/null +++ b/config/supervisor/review.md @@ -0,0 +1,30 @@ +# Code Review Discipline + +You are a disciplined code reviewer. Read files carefully before commenting. + +## Iron laws +1. Never approve security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries +2. Never approve silently swallowed errors — `err != nil` without wrapping or handling is always wrong +3. Never approve missing validation at system boundaries (user input, external APIs, file reads) + +## Output contract +Return JSON result with: +- `status`: "pass" if no blocking issues; "fail" if any iron law is violated +- `phase`: "review" +- `skill`: "review" +- `file_path`: first file reviewed +- `runner_output`: full review formatted as: + ``` + CRITICAL: at : + WARNING: at : + SUGGESTION: at : + ``` +- `verified`: true if you read all specified files; false if any were missing or unreadable +- `message`: "N critical, M warnings, K suggestions" or "clean: " + +## Rules +1. Read every file listed before writing feedback +2. Check iron laws first — any violation is CRITICAL and sets status to "fail" +3. Then check: correctness, test coverage for new code, Go style conventions +4. Never rubber-stamp — if nothing is wrong, explain specifically which iron law checks you ran and why they passed +5. Line references are required for every finding — "roughly around the middle" is not acceptable diff --git a/config/supervisor/spec.md b/config/supervisor/spec.md new file mode 100644 index 0000000..01ae148 --- /dev/null +++ b/config/supervisor/spec.md @@ -0,0 +1,46 @@ +# Spec Writing Discipline + +You write structured implementation specs. Nothing is left ambiguous. + +## Iron laws +1. Success criteria must be measurable — "the system is fast" is banned; "p99 < 200ms under 100 RPS" is valid +2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong +3. Every technical decision in the approach must have a rationale + +## Output contract +Return JSON result with: +- `status`: "pass" (spec written) or "error" (requirements too ambiguous to spec without more input) +- `phase`: "spec" +- `skill`: "spec" +- `file_path`: the output_path where the spec was written (absolute path) +- `runner_output`: "" +- `verified`: true if the file was written successfully +- `message`: "spec written: " + +## Spec structure +Write the spec as markdown to the output_path: + +```markdown +# [Feature] Spec + +## Problem statement +[What problem does this solve? For whom? Why now?] + +## Success criteria +- [ ] [Criterion 1 — measurable and verifiable] +- [ ] [Criterion 2 — measurable and verifiable] + +## Constraints +[Non-negotiable requirements the solution must satisfy] + +## Out of scope +[What we are explicitly NOT doing in this iteration] + +## Technical approach +[Architecture decisions, key components, rationale for each choice] + +## Risks +[What could go wrong, and how we'd mitigate it] +``` + +If the requirements are too vague to produce measurable success criteria, return status "error" with a message listing the specific questions that need answers. diff --git a/config/supervisor/trainer-reader.md b/config/supervisor/trainer-reader.md new file mode 100644 index 0000000..c1bab09 --- /dev/null +++ b/config/supervisor/trainer-reader.md @@ -0,0 +1,31 @@ +# Trainer Reader Discipline + +You scan session logs and identify candidate learning moments worth converting to training data. + +## What to look for +- **SFT candidates**: the worker did exactly the right thing — a clean pattern worth reinforcing +- **DPO candidates**: the worker first produced a wrong or suboptimal response, then corrected — you have both rejected and chosen + +## Scoring (1–5) +- 5: novel pattern, clearly correct, generalises across projects +- 4: good pattern, correct, somewhat project-specific but still useful +- 3: correct but obvious — include only if especially clean +- 2 or below: skip — too ambiguous or too context-specific + +## Output contract +Return JSON result with: +- `status`: "pass" or "error" +- `phase`: "trainer" +- `skill`: "trainer" +- `file_path`: "" +- `runner_output`: JSON array of candidates (valid JSON, not markdown): + [{"type":"sft","moment":"","prompt":"","completion":"","score":4}, + {"type":"dpo","moment":"","prompt":"","chosen":"","rejected":"","score":3}] +- `verified`: true +- `message`: "N sft candidates, M dpo candidates found" + +## Rules +1. Read all session entries in the task prompt +2. Score each entry — only include entries scoring >= 3 +3. Prompt/completion fields must be phrased to generalise: no project-specific paths or names +4. If no candidates score >= 3, return an empty array `[]` — never force low-quality candidates diff --git a/config/supervisor/trainer-writer.md b/config/supervisor/trainer-writer.md new file mode 100644 index 0000000..1947671 --- /dev/null +++ b/config/supervisor/trainer-writer.md @@ -0,0 +1,35 @@ +# Trainer Writer Discipline + +You receive candidate learning moments from the reader and write clean SFT/DPO training pairs. + +## Quality gate (apply before writing) +- SFT: prompt must be phrased so it could come from any project, not just this one +- DPO: chosen and rejected must be clearly distinguishable — skip if a reader can't tell which is better +- Never include project-specific paths, variable names, or identifiers in any pair + +## Output contract +Return JSON result with: +- `status`: "pass" (pairs written or skipped due to quality) or "error" (candidates JSON was malformed) +- `phase`: "trainer" +- `skill`: "trainer" +- `file_path`: path of the last file written (empty if nothing passed quality gate) +- `runner_output`: "N SFT pairs written to brain/training-data/sft/, M DPO pairs to brain/training-data/dpo/" or "0 pairs passed quality gate" +- `verified`: true if files were written; false if nothing passed +- `message`: "N sft + M dpo pairs for session " or "no pairs passed quality gate" + +## File format +JSONL — one JSON object per line. + +SFT: `{"prompt": "...", "completion": "..."}` +DPO: `{"prompt": "...", "chosen": "...", "rejected": "..."}` + +Write SFT to: `/training-data/sft/.jsonl` +Write DPO to: `/training-data/dpo/.jsonl` + +Append to existing files if they exist (don't overwrite). + +## Rules +1. Parse the `reader_candidates` JSON from the task prompt +2. For each candidate: apply quality gate +3. Write passing SFT candidates to sft JSONL, DPO candidates to dpo JSONL +4. If nothing passes, return status "pass" with verified: false and message "no pairs passed quality gate" diff --git a/internal/exec/result.go b/internal/exec/result.go index 79be573..8b2f4dc 100644 --- a/internal/exec/result.go +++ b/internal/exec/result.go @@ -10,7 +10,7 @@ import ( // validates its own output before returning. type Result struct { Status string `json:"status"` // pass | fail | error - Phase string `json:"phase"` // red | green | refactor + Phase string `json:"phase"` // red | green | refactor | retrospective | review | debug | spec | trainer Skill string `json:"skill"` // tdd | review | ... FilePath string `json:"file_path"` // absolute path to generated file RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner @@ -25,6 +25,10 @@ var validPhases = map[string]bool{ "green": true, "refactor": true, "retrospective": true, + "review": true, + "debug": true, + "spec": true, + "trainer": true, } func (r Result) Validate() error { @@ -33,7 +37,7 @@ func (r Result) Validate() error { errs = append(errs, "status must be pass|fail|error, got: "+r.Status) } if !validPhases[r.Phase] { - errs = append(errs, "phase must be red|green|refactor, got: "+r.Phase) + errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase) } if r.Skill == "" { errs = append(errs, "skill is required") @@ -50,7 +54,7 @@ const Schema = `{ "required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"], "properties": { "status": {"type": "string", "enum": ["pass","fail","error"]}, - "phase": {"type": "string", "enum": ["red","green","refactor"]}, + "phase": {"type": "string"}, "skill": {"type": "string"}, "file_path": {"type": "string"}, "runner_output": {"type": "string"}, diff --git a/internal/exec/result_test.go b/internal/exec/result_test.go index 2802e5a..2d94fac 100644 --- a/internal/exec/result_test.go +++ b/internal/exec/result_test.go @@ -69,3 +69,11 @@ func TestResultValidation(t *testing.T) { }) } } + +func TestValidateAcceptsAllPhases(t *testing.T) { + phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"} + for _, phase := range phases { + r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"} + assert.NoError(t, r.Validate(), "phase %q should be valid", phase) + } +} diff --git a/internal/session/history.go b/internal/session/history.go new file mode 100644 index 0000000..5d3f4f4 --- /dev/null +++ b/internal/session/history.go @@ -0,0 +1,38 @@ +// internal/session/history.go +package session + +import ( + "fmt" + "strings" +) + +// FormatHistory formats prior session entries as a structured block for +// injection into a worker task prompt. Entries matching excludePhase are +// omitted (pass the current phase to avoid circular injection). +func FormatHistory(entries []Entry, excludePhase string) string { + var filtered []Entry + for _, e := range entries { + if e.Phase != excludePhase { + filtered = append(filtered, e) + } + } + if len(filtered) == 0 { + return "" + } + + var b strings.Builder + b.WriteString("## Session history\n\n") + for _, e := range filtered { + b.WriteString(fmt.Sprintf("### Phase: %s\n", e.Phase)) + b.WriteString(fmt.Sprintf("- Skill: %s\n", e.Skill)) + b.WriteString(fmt.Sprintf("- Status: %s\n", e.FinalStatus)) + if e.FilePath != "" { + b.WriteString(fmt.Sprintf("- File: %s\n", e.FilePath)) + } + if e.Message != "" { + b.WriteString(fmt.Sprintf("- Summary: %s\n", e.Message)) + } + b.WriteString("\n") + } + return b.String() +} diff --git a/internal/session/history_test.go b/internal/session/history_test.go new file mode 100644 index 0000000..9117d23 --- /dev/null +++ b/internal/session/history_test.go @@ -0,0 +1,41 @@ +// internal/session/history_test.go +package session_test + +import ( + "testing" + "time" + + "github.com/mathiasbq/supervisor/internal/session" + "github.com/stretchr/testify/assert" +) + +func TestFormatHistoryEmpty(t *testing.T) { + result := session.FormatHistory(nil, "") + assert.Equal(t, "", result) +} + +func TestFormatHistoryFormatsEntries(t *testing.T) { + entries := []session.Entry{ + { + Skill: "tdd", Phase: "red", FinalStatus: "pass", + FilePath: "internal/foo/foo_test.go", + Message: "wrote failing test for Foo", + Timestamp: time.Now(), + }, + } + result := session.FormatHistory(entries, "") + assert.Contains(t, result, "## Session history") + assert.Contains(t, result, "Phase: red") + assert.Contains(t, result, "wrote failing test for Foo") + assert.Contains(t, result, "internal/foo/foo_test.go") +} + +func TestFormatHistoryExcludesCurrentPhase(t *testing.T) { + entries := []session.Entry{ + {Skill: "tdd", Phase: "red", Message: "red done", FinalStatus: "pass"}, + {Skill: "tdd", Phase: "green", Message: "green done", FinalStatus: "pass"}, + } + result := session.FormatHistory(entries, "green") + assert.Contains(t, result, "red done") + assert.NotContains(t, result, "green done") +} diff --git a/internal/skills/debug/handlers.go b/internal/skills/debug/handlers.go new file mode 100644 index 0000000..05b63c0 --- /dev/null +++ b/internal/skills/debug/handlers.go @@ -0,0 +1,80 @@ +// internal/skills/debug/handlers.go +package debug + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type debugArgs struct { + ProjectRoot string `json:"project_root"` + Error string `json:"error"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "debug" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a debugArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Error == "" { + return nil, fmt.Errorf("error is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Error, a.Context, model, + ) + task = s.prependHistory(a.SessionID, "debug", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} diff --git a/internal/skills/debug/handlers_test.go b/internal/skills/debug/handlers_test.go new file mode 100644 index 0000000..ddf0c4b --- /dev/null +++ b/internal/skills/debug/handlers_test.go @@ -0,0 +1,61 @@ +// internal/skills/debug/handlers_test.go +package debug_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/debug" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDebugToolRegistered(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "debug rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "debug") +} + +func TestDebugRequiresProjectRoot(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestDebugRequiresError(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "error") +} + +func TestDebugCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "debug", Skill: "debug", + RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference", + Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42", + }, nil + } + + sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "debug", json.RawMessage( + `{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "panic: nil pointer dereference") + assert.Contains(t, capturedTask, "occurs on startup") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "debug", result.Phase) +} diff --git a/internal/skills/debug/skill.go b/internal/skills/debug/skill.go new file mode 100644 index 0000000..8dcc083 --- /dev/null +++ b/internal/skills/debug/skill.go @@ -0,0 +1,55 @@ +// internal/skills/debug/skill.go +package debug + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn is the function signature for running a worker subprocess. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds dependencies for the debug skill. +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +// Skill implements the debug MCP tool. +type Skill struct{ cfg Config } + +// New creates a new debug Skill. +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +// Name returns the skill identifier. +func (s *Skill) Name() string { return "debug" } + +// Tools returns the MCP tool definitions for this skill. +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "debug", + Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.", + InputSchema: schema( + []string{"project_root", "error"}, + map[string]any{ + "project_root": str, + "error": str, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +} diff --git a/internal/skills/review/handlers.go b/internal/skills/review/handlers.go new file mode 100644 index 0000000..b5da04f --- /dev/null +++ b/internal/skills/review/handlers.go @@ -0,0 +1,81 @@ +// internal/skills/review/handlers.go +package review + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type reviewArgs struct { + ProjectRoot string `json:"project_root"` + Files []string `json:"files"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "review" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a reviewArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if len(a.Files) == 0 { + return nil, fmt.Errorf("files is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model, + ) + task = s.prependHistory(a.SessionID, "review", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} diff --git a/internal/skills/review/handlers_test.go b/internal/skills/review/handlers_test.go new file mode 100644 index 0000000..2d32397 --- /dev/null +++ b/internal/skills/review/handlers_test.go @@ -0,0 +1,61 @@ +// internal/skills/review/handlers_test.go +package review_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/review" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReviewToolRegistered(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "review rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "review") +} + +func TestReviewRequiresProjectRoot(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "r"}) + _, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"files":["main.go"]}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestReviewRequiresFiles(t *testing.T) { + sk := review.New(review.Config{SkillPrompt: "r"}) + _, err := sk.Handle(context.Background(), "review", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "files") +} + +func TestReviewCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "review", Skill: "review", + Verified: true, ModelUsed: "self", Message: "2 warnings found", + }, nil + } + + sk := review.New(review.Config{SkillPrompt: "review rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "review", json.RawMessage( + `{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "internal/foo/foo.go") + assert.Contains(t, capturedTask, "PR: add Foo helper") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "pass", result.Status) + assert.Equal(t, "review", result.Phase) +} diff --git a/internal/skills/review/skill.go b/internal/skills/review/skill.go new file mode 100644 index 0000000..25a6936 --- /dev/null +++ b/internal/skills/review/skill.go @@ -0,0 +1,55 @@ +// internal/skills/review/skill.go +package review + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn is the function signature for running a worker subprocess. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds dependencies for the review skill. +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +// Skill implements the review MCP tool. +type Skill struct{ cfg Config } + +// New creates a new review Skill. +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +// Name returns the skill identifier. +func (s *Skill) Name() string { return "review" } + +// Tools returns the MCP tool definitions for this skill. +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "review", + Description: "Perform a structured code review of the specified files. Returns findings with severity levels.", + InputSchema: schema( + []string{"project_root", "files"}, + map[string]any{ + "project_root": str, + "files": map[string]any{"type": "array", "items": map[string]any{"type": "string"}}, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +} diff --git a/internal/skills/spec/handlers.go b/internal/skills/spec/handlers.go new file mode 100644 index 0000000..2c3dc2d --- /dev/null +++ b/internal/skills/spec/handlers.go @@ -0,0 +1,85 @@ +// internal/skills/spec/handlers.go +package spec + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type specArgs struct { + ProjectRoot string `json:"project_root"` + Requirements string `json:"requirements"` + OutputPath string `json:"output_path"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "spec" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a specArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Requirements == "" { + return nil, fmt.Errorf("requirements is required") + } + outputPath := a.OutputPath + if outputPath == "" { + outputPath = "docs/spec.md" + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Requirements, outputPath, a.Context, model, + ) + task = s.prependHistory(a.SessionID, "spec", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} diff --git a/internal/skills/spec/handlers_test.go b/internal/skills/spec/handlers_test.go new file mode 100644 index 0000000..6ccf6c4 --- /dev/null +++ b/internal/skills/spec/handlers_test.go @@ -0,0 +1,61 @@ +// internal/skills/spec/handlers_test.go +package spec_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/spec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSpecToolRegistered(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "spec rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "spec") +} + +func TestSpecRequiresProjectRoot(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "s"}) + _, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"requirements":"add login"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestSpecRequiresRequirements(t *testing.T) { + sk := spec.New(spec.Config{SkillPrompt: "s"}) + _, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "requirements") +} + +func TestSpecCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "spec", Skill: "spec", + FilePath: "/tmp/proj/docs/login-spec.md", + Verified: true, ModelUsed: "self", Message: "spec written: login feature", + }, nil + } + + sk := spec.New(spec.Config{SkillPrompt: "spec rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "spec", json.RawMessage( + `{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "OAuth2 login") + assert.Contains(t, capturedTask, "docs/login-spec.md") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "spec", result.Phase) +} diff --git a/internal/skills/spec/skill.go b/internal/skills/spec/skill.go new file mode 100644 index 0000000..fa52d20 --- /dev/null +++ b/internal/skills/spec/skill.go @@ -0,0 +1,56 @@ +// internal/skills/spec/skill.go +package spec + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn is the function signature for running a worker subprocess. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds dependencies for the spec skill. +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +// Skill implements the spec MCP tool. +type Skill struct{ cfg Config } + +// New creates a new spec Skill. +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +// Name returns the skill identifier. +func (s *Skill) Name() string { return "spec" } + +// Tools returns the MCP tool definitions for this skill. +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "spec", + Description: "Generate a structured implementation spec from requirements. Writes the spec to output_path in the project.", + InputSchema: schema( + []string{"project_root", "requirements"}, + map[string]any{ + "project_root": str, + "requirements": str, + "output_path": str, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +} diff --git a/internal/skills/tdd/handlers.go b/internal/skills/tdd/handlers.go index 98d904a..89ce09d 100644 --- a/internal/skills/tdd/handlers.go +++ b/internal/skills/tdd/handlers.go @@ -6,6 +6,7 @@ import ( "fmt" iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" ) func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { @@ -51,6 +52,7 @@ type greenArgs struct { TestPath string `json:"test_path"` Model string `json:"model"` TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` } func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { @@ -68,6 +70,7 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM "phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s", args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd, ) + task = s.prependHistory(args.SessionID, "green", task) return s.execute(ctx, task) } @@ -77,6 +80,7 @@ type refactorArgs struct { ImplPath string `json:"impl_path"` Model string `json:"model"` TestCmd string `json:"test_cmd"` + SessionID string `json:"session_id"` } func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { @@ -97,9 +101,25 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R "phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s", args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd, ) + task = s.prependHistory(args.SessionID, "refactor", task) return s.execute(ctx, task) } +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} + func (s *Skill) resolveModel(override string) string { if override != "" { return override diff --git a/internal/skills/tdd/handlers_test.go b/internal/skills/tdd/handlers_test.go index d0490b6..e299cbb 100644 --- a/internal/skills/tdd/handlers_test.go +++ b/internal/skills/tdd/handlers_test.go @@ -5,6 +5,8 @@ import ( "encoding/json" "testing" + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" "github.com/mathiasbq/supervisor/internal/skills/tdd" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -41,5 +43,43 @@ func TestTDDRedRequiresSpec(t *testing.T) { assert.ErrorContains(t, err, "spec") } +func TestTDDGreenInjectsSessionHistory(t *testing.T) { + sessDir := t.TempDir() + require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{ + SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass", + FilePath: "internal/foo/foo_test.go", + Message: "wrote failing test for Foo", + })) + + var capturedPrompt string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + capturedPrompt = req.TaskPrompt + return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil + } + + sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir}) + _, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage( + `{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`, + )) + require.NoError(t, err) + assert.Contains(t, capturedPrompt, "## Session history") + assert.Contains(t, capturedPrompt, "wrote failing test for Foo") +} + +func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) { + var capturedPrompt string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + capturedPrompt = req.TaskPrompt + return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil + } + + sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + _, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage( + `{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`, + )) + require.NoError(t, err) + assert.NotContains(t, capturedPrompt, "## Session history") +} + // Ensure require is used (avoids import error). var _ = require.New diff --git a/internal/skills/tdd/skill.go b/internal/skills/tdd/skill.go index c3fdbc6..5d74417 100644 --- a/internal/skills/tdd/skill.go +++ b/internal/skills/tdd/skill.go @@ -16,6 +16,7 @@ type Config struct { SkillPrompt string ExecutorFn ExecutorFn // nil = no executor (tests that don't reach execute()) DefaultModel string + SessionsDir string // optional: path to brain/sessions/ for history injection } type Skill struct { @@ -63,6 +64,7 @@ func (s *Skill) Tools() []registry.ToolDef { "test_path": strProp, "model": strProp, "test_cmd": strProp, + "session_id": strProp, }, ), }, @@ -77,6 +79,7 @@ func (s *Skill) Tools() []registry.ToolDef { "impl_path": strProp, "model": strProp, "test_cmd": strProp, + "session_id": strProp, }, ), }, diff --git a/internal/skills/trainer/handlers.go b/internal/skills/trainer/handlers.go new file mode 100644 index 0000000..7c21e8d --- /dev/null +++ b/internal/skills/trainer/handlers.go @@ -0,0 +1,80 @@ +// internal/skills/trainer/handlers.go +package trainer + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type trainArgs struct { + SessionID string `json:"session_id"` + Model string `json:"model"` +} + +// Handle dispatches the MCP tool call to the trainer handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "trainer" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a trainArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.SessionID == "" { + return nil, fmt.Errorf("session_id is required") + } + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + entries, err := session.Read(s.cfg.SessionsDir, a.SessionID) + if err != nil { + return nil, fmt.Errorf("read session log: %w", err) + } + + // ── Step 1: Reader agent ───────────────────────────────────────────────── + history := session.FormatHistory(entries, "") + readerTask := fmt.Sprintf( + "role: reader\nsession_id: %s\nbrain_dir: %s\n\n%s", + a.SessionID, s.cfg.BrainDir, history, + ) + readerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.ReaderPrompt, + TaskPrompt: readerTask, + Model: model, + Tools: "Read", + }) + if err != nil { + return nil, fmt.Errorf("reader agent: %w", err) + } + + // ── Step 2: Writer agent (receives reader candidates) ──────────────────── + writerTask := fmt.Sprintf( + "role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_summary: %s\nreader_candidates:\n%s", + a.SessionID, s.cfg.BrainDir, readerResult.Message, readerResult.RunnerOutput, + ) + writerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.WriterPrompt, + TaskPrompt: writerTask, + Model: model, + Tools: "Read,Write", + }) + if err != nil { + return nil, fmt.Errorf("writer agent: %w", err) + } + + b, err := json.Marshal(writerResult) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} diff --git a/internal/skills/trainer/handlers_test.go b/internal/skills/trainer/handlers_test.go new file mode 100644 index 0000000..e20704b --- /dev/null +++ b/internal/skills/trainer/handlers_test.go @@ -0,0 +1,82 @@ +// internal/skills/trainer/handlers_test.go +package trainer_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" + "github.com/mathiasbq/supervisor/internal/skills/trainer" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTrainerToolRegistered(t *testing.T) { + sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "trainer") +} + +func TestTrainerRequiresSessionID(t *testing.T) { + sk := trainer.New(trainer.Config{ReaderPrompt: "r", WriterPrompt: "w"}) + _, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{}`)) + assert.ErrorContains(t, err, "session_id") +} + +func TestTrainerCallsReaderThenWriter(t *testing.T) { + sessDir := t.TempDir() + require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{ + SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass", + Message: "wrote failing test", FilePath: "internal/foo/foo_test.go", + })) + + callCount := 0 + var readerTask, writerTask string + + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + callCount++ + if callCount == 1 { + // reader call + readerTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "trainer", Skill: "trainer", + RunnerOutput: `[{"type":"sft","moment":"first-pass clean TDD","score":4}]`, + Verified: true, ModelUsed: "self", Message: "1 sft candidate found", + }, nil + } + // writer call + writerTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "trainer", Skill: "trainer", + FilePath: sessDir + "/training-data/sft/sess-1.jsonl", + Verified: true, ModelUsed: "self", Message: "1 sft pair written", + }, nil + } + + sk := trainer.New(trainer.Config{ + ReaderPrompt: "reader rules", + WriterPrompt: "writer rules", + ExecutorFn: fakeFn, + SessionsDir: sessDir, + BrainDir: t.TempDir(), + }) + out, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{"session_id":"sess-1"}`)) + require.NoError(t, err) + + assert.Equal(t, 2, callCount, "executor must be called exactly twice: reader then writer") + assert.Contains(t, readerTask, "role: reader") + assert.Contains(t, readerTask, "sess-1") + assert.Contains(t, readerTask, "wrote failing test") // session history in reader prompt + assert.Contains(t, writerTask, "role: writer") + assert.Contains(t, writerTask, "sft candidate") // reader output passed to writer + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "trainer", result.Phase) + assert.Equal(t, "pass", result.Status) +} diff --git a/internal/skills/trainer/skill.go b/internal/skills/trainer/skill.go new file mode 100644 index 0000000..d5ecf86 --- /dev/null +++ b/internal/skills/trainer/skill.go @@ -0,0 +1,53 @@ +// internal/skills/trainer/skill.go +package trainer + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn is the function signature for running a worker subprocess. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds dependencies for the trainer skill. +type Config struct { + ReaderPrompt string + WriterPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string + BrainDir string // root of brain/ directory; writer writes to BrainDir/training-data/ +} + +// Skill implements the trainer MCP tool. +type Skill struct{ cfg Config } + +// New creates a new trainer Skill. +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +// Name returns the skill identifier. +func (s *Skill) Name() string { return "trainer" } + +// Tools returns the MCP tool definitions for this skill. +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + return []registry.ToolDef{ + { + Name: "trainer", + Description: "Extract SFT and DPO training pairs from a session log. Runs a reader→writer chain: reader identifies learning moments, writer formats and writes pairs to brain/training-data/.", + InputSchema: schema( + []string{"session_id"}, + map[string]any{ + "session_id": map[string]any{"type": "string"}, + "model": map[string]any{"type": "string"}, + }, + ), + }, + } +}