From 8cff57009a7ccaeca75d9e30f6717d61ee1ea632 Mon Sep 17 00:00:00 2001
From: Mathias Bergqvist <mthbqv@gmail.com>
Date: Sun, 19 Apr 2026 11:29:58 +0200
Subject: [PATCH] feat(debug): add debug MCP skill with hypothesis generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the debug skill following the same pattern as review. The skill
accepts project_root + error (+ optional context/model/session_id), prepends
session history, and calls the executor to produce 3-5 ordered hypotheses —
diagnosis only, no fixes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cmd/supervisor/main.go                 | 13 +++++
 config/supervisor/debug.md             | 31 ++++++++++
 internal/skills/debug/handlers.go      | 80 ++++++++++++++++++++++++++
 internal/skills/debug/handlers_test.go | 61 ++++++++++++++++++++
 internal/skills/debug/skill.go         | 55 ++++++++++++++++++
 5 files changed, 240 insertions(+)
 create mode 100644 config/supervisor/debug.md
 create mode 100644 internal/skills/debug/handlers.go
 create mode 100644 internal/skills/debug/handlers_test.go
 create mode 100644 internal/skills/debug/skill.go
diff --git a/cmd/supervisor/main.go b/cmd/supervisor/main.go
index 8749e06..9a6b325 100644
--- a/cmd/supervisor/main.go
+++ b/cmd/supervisor/main.go
@@ -13,6 +13,7 @@ import (
 	"github.com/mathiasbq/supervisor/internal/skills/brain"
 	"github.com/mathiasbq/supervisor/internal/skills/org"
 	"github.com/mathiasbq/supervisor/internal/skills/retrospective"
+	skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug"
 	"github.com/mathiasbq/supervisor/internal/skills/review"
 	"github.com/mathiasbq/supervisor/internal/skills/sessionlog"
 	"github.com/mathiasbq/supervisor/internal/skills/tdd"
@@ -58,6 +59,12 @@ func main() {
 		os.Exit(1)
 	}
 
+	debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md")
+	if err != nil {
+		logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err)
+		os.Exit(1)
+	}
+
 	executor := iexec.New(iexec.Config{
 		SystemPrompt:   string(systemPrompt),
 		LiteLLMBaseURL: cfg.LiteLLMBaseURL,
@@ -97,6 +104,12 @@ func main() {
 		ExecutorFn:   executor.Run,
 		SessionsDir:  cfg.SessionsDir,
 	}))
+	reg.Register(skilldebug.New(skilldebug.Config{
+		SkillPrompt:  string(debugPrompt),
+		DefaultModel: models.Resolve("debug", ""),
+		ExecutorFn:   executor.Run,
+		SessionsDir:  cfg.SessionsDir,
+	}))
 
 	srv := mcp.NewServer(reg)
 	mux := http.NewServeMux()
diff --git a/config/supervisor/debug.md b/config/supervisor/debug.md
new file mode 100644
index 0000000..c800dc5
--- /dev/null
+++ b/config/supervisor/debug.md
@@ -0,0 +1,31 @@
+# Debug Discipline
+
+You are a systematic debugger. Form hypotheses before suggesting fixes.
+
+## Iron laws
+1. Never suggest "try X and see what happens" — every hypothesis must have a specific expected outcome if correct
+2. Generate exactly 3-5 hypotheses, ordered by likelihood (most likely first)
+3. Never fix the bug — diagnose only; the caller decides what to do with the hypotheses
+
+## Output contract
+Return JSON result with:
+- `status`: "pass" (hypotheses generated) or "error" (error too ambiguous to analyse)
+- `phase`: "debug"
+- `skill`: "debug"
+- `file_path`: the most relevant file to the error (read it)
+- `runner_output`: your hypotheses, formatted as:
+  ```
+  HYPOTHESIS 1 (likelihood: high): <mechanism>
+  VERIFY: <exact command or file to check> → expected if correct: <specific output>
+
+  HYPOTHESIS 2 (likelihood: medium): <mechanism>
+  VERIFY: <exact command or file to check> → expected if correct: <specific output>
+  ```
+- `verified`: false — verification is the caller's job
+- `message`: "N hypotheses for: <one-line error summary>"
+
+## Rules
+1. Read the error and any context files provided before forming hypotheses
+2. Identify the failure mode first — what actually went wrong, not just what the error says
+3. For each hypothesis: name the mechanism, explain why it would produce this exact error, give a concrete verification command with expected output
+4. If the error is clearly a typo or trivial mistake, still form 3 hypotheses — surface the most likely cause as #1
diff --git a/internal/skills/debug/handlers.go b/internal/skills/debug/handlers.go
new file mode 100644
index 0000000..05b63c0
--- /dev/null
+++ b/internal/skills/debug/handlers.go
@@ -0,0 +1,80 @@
+// internal/skills/debug/handlers.go
+package debug
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	iexec "github.com/mathiasbq/supervisor/internal/exec"
+	"github.com/mathiasbq/supervisor/internal/session"
+)
+
+type debugArgs struct {
+	ProjectRoot string `json:"project_root"`
+	Error       string `json:"error"`
+	Context     string `json:"context"`
+	Model       string `json:"model"`
+	SessionID   string `json:"session_id"`
+}
+
+// Handle dispatches the MCP tool call to the appropriate handler.
+func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
+	if tool != "debug" {
+		return nil, fmt.Errorf("unknown tool: %s", tool)
+	}
+	var a debugArgs
+	if err := json.Unmarshal(args, &a); err != nil {
+		return nil, fmt.Errorf("parse args: %w", err)
+	}
+	if a.ProjectRoot == "" {
+		return nil, fmt.Errorf("project_root is required")
+	}
+	if a.Error == "" {
+		return nil, fmt.Errorf("error is required")
+	}
+
+	model := a.Model
+	if model == "" {
+		model = s.cfg.DefaultModel
+	}
+
+	task := fmt.Sprintf(
+		"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
+		a.ProjectRoot, a.Error, a.Context, model,
+	)
+	task = s.prependHistory(a.SessionID, "debug", task)
+
+	if s.cfg.ExecutorFn == nil {
+		return nil, fmt.Errorf("no executor configured")
+	}
+	result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
+		SkillPrompt: s.cfg.SkillPrompt,
+		TaskPrompt:  task,
+		Model:       model,
+		Tools:       "Read,Bash",
+	})
+	if err != nil {
+		return nil, err
+	}
+	b, err := json.Marshal(result)
+	if err != nil {
+		return nil, fmt.Errorf("marshal result: %w", err)
+	}
+	return b, nil
+}
+
+func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
+	if sessionID == "" || s.cfg.SessionsDir == "" {
+		return task
+	}
+	entries, err := session.Read(s.cfg.SessionsDir, sessionID)
+	if err != nil || len(entries) == 0 {
+		return task
+	}
+	history := session.FormatHistory(entries, currentPhase)
+	if history == "" {
+		return task
+	}
+	return history + "\n---\n\n" + task
+}
diff --git a/internal/skills/debug/handlers_test.go b/internal/skills/debug/handlers_test.go
new file mode 100644
index 0000000..ddf0c4b
--- /dev/null
+++ b/internal/skills/debug/handlers_test.go
@@ -0,0 +1,61 @@
+// internal/skills/debug/handlers_test.go
+package debug_test
+
+import (
+	"context"
+	"encoding/json"
+	"testing"
+
+	iexec "github.com/mathiasbq/supervisor/internal/exec"
+	"github.com/mathiasbq/supervisor/internal/skills/debug"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestDebugToolRegistered(t *testing.T) {
+	sk := debug.New(debug.Config{SkillPrompt: "debug rules"})
+	names := make([]string, 0)
+	for _, tool := range sk.Tools() {
+		names = append(names, tool.Name)
+	}
+	assert.Contains(t, names, "debug")
+}
+
+func TestDebugRequiresProjectRoot(t *testing.T) {
+	sk := debug.New(debug.Config{SkillPrompt: "d"})
+	_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"error":"panic: nil pointer"}`))
+	assert.ErrorContains(t, err, "project_root")
+}
+
+func TestDebugRequiresError(t *testing.T) {
+	sk := debug.New(debug.Config{SkillPrompt: "d"})
+	_, err := sk.Handle(context.Background(), "debug", json.RawMessage(`{"project_root":"/tmp"}`))
+	assert.ErrorContains(t, err, "error")
+}
+
+func TestDebugCallsExecutor(t *testing.T) {
+	called := false
+	var capturedTask string
+	fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
+		called = true
+		capturedTask = req.TaskPrompt
+		return iexec.Result{
+			Status: "pass", Phase: "debug", Skill: "debug",
+			RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference",
+			Verified:     false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42",
+		}, nil
+	}
+
+	sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
+	out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
+		`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
+	))
+	require.NoError(t, err)
+	assert.True(t, called)
+	assert.Contains(t, capturedTask, "panic: nil pointer dereference")
+	assert.Contains(t, capturedTask, "occurs on startup")
+
+	var result iexec.Result
+	require.NoError(t, json.Unmarshal(out, &result))
+	assert.Equal(t, "debug", result.Phase)
+}
diff --git a/internal/skills/debug/skill.go b/internal/skills/debug/skill.go
new file mode 100644
index 0000000..8dcc083
--- /dev/null
+++ b/internal/skills/debug/skill.go
@@ -0,0 +1,55 @@
+// internal/skills/debug/skill.go
+package debug
+
+import (
+	"context"
+	"encoding/json"
+
+	iexec "github.com/mathiasbq/supervisor/internal/exec"
+	"github.com/mathiasbq/supervisor/internal/registry"
+)
+
+// ExecutorFn is the function signature for running a worker subprocess.
+type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
+
+// Config holds dependencies for the debug skill.
+type Config struct {
+	SkillPrompt  string
+	DefaultModel string
+	ExecutorFn   ExecutorFn
+	SessionsDir  string
+}
+
+// Skill implements the debug MCP tool.
+type Skill struct{ cfg Config }
+
+// New creates a new debug Skill.
+func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
+
+// Name returns the skill identifier.
+func (s *Skill) Name() string { return "debug" }
+
+// Tools returns the MCP tool definitions for this skill.
+func (s *Skill) Tools() []registry.ToolDef {
+	schema := func(required []string, props map[string]any) json.RawMessage {
+		b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
+		return b
+	}
+	str := map[string]any{"type": "string"}
+	return []registry.ToolDef{
+		{
+			Name:        "debug",
+			Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.",
+			InputSchema: schema(
+				[]string{"project_root", "error"},
+				map[string]any{
+					"project_root": str,
+					"error":        str,
+					"context":      str,
+					"model":        str,
+					"session_id":   str,
+				},
+			),
+		},
+	}
+}