From 8cff57009a7ccaeca75d9e30f6717d61ee1ea632 Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Sun, 19 Apr 2026 11:29:58 +0200 Subject: [PATCH] feat(debug): add debug MCP skill with hypothesis generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the debug skill following the same pattern as review. The skill accepts project_root + error (+ optional context/model/session_id), prepends session history, and calls the executor to produce 3-5 ordered hypotheses — diagnosis only, no fixes. Co-Authored-By: Claude Sonnet 4.6 --- cmd/supervisor/main.go | 13 +++++ config/supervisor/debug.md | 31 ++++++++++ internal/skills/debug/handlers.go | 80 ++++++++++++++++++++++++++ internal/skills/debug/handlers_test.go | 61 ++++++++++++++++++++ internal/skills/debug/skill.go | 55 ++++++++++++++++++ 5 files changed, 240 insertions(+) create mode 100644 config/supervisor/debug.md create mode 100644 internal/skills/debug/handlers.go create mode 100644 internal/skills/debug/handlers_test.go create mode 100644 internal/skills/debug/skill.go diff --git a/cmd/supervisor/main.go b/cmd/supervisor/main.go index 8749e06..9a6b325 100644 --- a/cmd/supervisor/main.go +++ b/cmd/supervisor/main.go @@ -13,6 +13,7 @@ import ( "github.com/mathiasbq/supervisor/internal/skills/brain" "github.com/mathiasbq/supervisor/internal/skills/org" "github.com/mathiasbq/supervisor/internal/skills/retrospective" + skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug" "github.com/mathiasbq/supervisor/internal/skills/review" "github.com/mathiasbq/supervisor/internal/skills/sessionlog" "github.com/mathiasbq/supervisor/internal/skills/tdd" @@ -58,6 +59,12 @@ func main() { os.Exit(1) } + debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md") + if err != nil { + logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err) + os.Exit(1) + } + executor := iexec.New(iexec.Config{ SystemPrompt: string(systemPrompt), LiteLLMBaseURL: cfg.LiteLLMBaseURL, @@ -97,6 +104,12 @@ func main() { ExecutorFn: executor.Run, SessionsDir: cfg.SessionsDir, })) + reg.Register(skilldebug.New(skilldebug.Config{ + SkillPrompt: string(debugPrompt), + DefaultModel: models.Resolve("debug", ""), + ExecutorFn: executor.Run, + SessionsDir: cfg.SessionsDir, + })) srv := mcp.NewServer(reg) mux := http.NewServeMux() diff --git a/config/supervisor/debug.md b/config/supervisor/debug.md new file mode 100644 index 0000000..c800dc5 --- /dev/null +++ b/config/supervisor/debug.md @@ -0,0 +1,31 @@ +# Debug Discipline + +You are a systematic debugger. Form hypotheses before suggesting fixes. + +## Iron laws +1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct +2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first) +3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses + +## Output contract +Return JSON result with: +- `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse) +- `phase`: "debug" +- `skill`: "debug" +- `file_path`: the most relevant file to the error (read it) +- `runner_output`: your hypotheses, formatted as: + ``` + HYPOTHESIS 1 (likelihood: high): + VERIFY: → expected if correct: + + HYPOTHESIS 2 (likelihood: medium): + VERIFY: → expected if correct: + ``` +- `verified`: false — verification is the caller's job +- `message`: "N hypotheses for: " + +## Rules +1. Read the error and any context files provided before forming hypotheses +2. Identify the failure mode first — what actually went wrong, not just what the error says +3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output +4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1 diff --git a/internal/skills/debug/handlers.go b/internal/skills/debug/handlers.go new file mode 100644 index 0000000..05b63c0 --- /dev/null +++ b/internal/skills/debug/handlers.go @@ -0,0 +1,80 @@ +// internal/skills/debug/handlers.go +package debug + +import ( + "context" + "encoding/json" + "fmt" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/session" +) + +type debugArgs struct { + ProjectRoot string `json:"project_root"` + Error string `json:"error"` + Context string `json:"context"` + Model string `json:"model"` + SessionID string `json:"session_id"` +} + +// Handle dispatches the MCP tool call to the appropriate handler. +func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) { + if tool != "debug" { + return nil, fmt.Errorf("unknown tool: %s", tool) + } + var a debugArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + if a.Error == "" { + return nil, fmt.Errorf("error is required") + } + + model := a.Model + if model == "" { + model = s.cfg.DefaultModel + } + + task := fmt.Sprintf( + "phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s", + a.ProjectRoot, a.Error, a.Context, model, + ) + task = s.prependHistory(a.SessionID, "debug", task) + + if s.cfg.ExecutorFn == nil { + return nil, fmt.Errorf("no executor configured") + } + result, err := s.cfg.ExecutorFn(ctx, iexec.Request{ + SkillPrompt: s.cfg.SkillPrompt, + TaskPrompt: task, + Model: model, + Tools: "Read,Bash", + }) + if err != nil { + return nil, err + } + b, err := json.Marshal(result) + if err != nil { + return nil, fmt.Errorf("marshal result: %w", err) + } + return b, nil +} + +func (s *Skill) prependHistory(sessionID, currentPhase, task string) string { + if sessionID == "" || s.cfg.SessionsDir == "" { + return task + } + entries, err := session.Read(s.cfg.SessionsDir, sessionID) + if err != nil || len(entries) == 0 { + return task + } + history := session.FormatHistory(entries, currentPhase) + if history == "" { + return task + } + return history + "\n---\n\n" + task +} diff --git a/internal/skills/debug/handlers_test.go b/internal/skills/debug/handlers_test.go new file mode 100644 index 0000000..ddf0c4b --- /dev/null +++ b/internal/skills/debug/handlers_test.go @@ -0,0 +1,61 @@ +// internal/skills/debug/handlers_test.go +package debug_test + +import ( + "context" + "encoding/json" + "testing" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/skills/debug" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDebugToolRegistered(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "debug rules"}) + names := make([]string, 0) + for _, tool := range sk.Tools() { + names = append(names, tool.Name) + } + assert.Contains(t, names, "debug") +} + +func TestDebugRequiresProjectRoot(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`)) + assert.ErrorContains(t, err, "project_root") +} + +func TestDebugRequiresError(t *testing.T) { + sk := debug.New(debug.Config{SkillPrompt: "d"}) + _, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`)) + assert.ErrorContains(t, err, "error") +} + +func TestDebugCallsExecutor(t *testing.T) { + called := false + var capturedTask string + fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) { + called = true + capturedTask = req.TaskPrompt + return iexec.Result{ + Status: "pass", Phase: "debug", Skill: "debug", + RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference", + Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42", + }, nil + } + + sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()}) + out, err := sk.Handle(context.Background(), "debug", json.RawMessage( + `{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`, + )) + require.NoError(t, err) + assert.True(t, called) + assert.Contains(t, capturedTask, "panic: nil pointer dereference") + assert.Contains(t, capturedTask, "occurs on startup") + + var result iexec.Result + require.NoError(t, json.Unmarshal(out, &result)) + assert.Equal(t, "debug", result.Phase) +} diff --git a/internal/skills/debug/skill.go b/internal/skills/debug/skill.go new file mode 100644 index 0000000..8dcc083 --- /dev/null +++ b/internal/skills/debug/skill.go @@ -0,0 +1,55 @@ +// internal/skills/debug/skill.go +package debug + +import ( + "context" + "encoding/json" + + iexec "github.com/mathiasbq/supervisor/internal/exec" + "github.com/mathiasbq/supervisor/internal/registry" +) + +// ExecutorFn is the function signature for running a worker subprocess. +type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error) + +// Config holds dependencies for the debug skill. +type Config struct { + SkillPrompt string + DefaultModel string + ExecutorFn ExecutorFn + SessionsDir string +} + +// Skill implements the debug MCP tool. +type Skill struct{ cfg Config } + +// New creates a new debug Skill. +func New(cfg Config) *Skill { return &Skill{cfg: cfg} } + +// Name returns the skill identifier. +func (s *Skill) Name() string { return "debug" } + +// Tools returns the MCP tool definitions for this skill. +func (s *Skill) Tools() []registry.ToolDef { + schema := func(required []string, props map[string]any) json.RawMessage { + b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props}) + return b + } + str := map[string]any{"type": "string"} + return []registry.ToolDef{ + { + Name: "debug", + Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.", + InputSchema: schema( + []string{"project_root", "error"}, + map[string]any{ + "project_root": str, + "error": str, + "context": str, + "model": str, + "session_id": str, + }, + ), + }, + } +}