The routing decision is about reasoning capacity, not cost or provider. Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the implicit Anthropic dependency from the routing pod — both models go through LiteLLM. Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL, HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL, Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel, log decision "claude_fallback" → "thinking_fallback". Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
124 lines
4.1 KiB
Go
124 lines
4.1 KiB
Go
package main
|
|
|
|
// The internal/skills/{debug,retrospective,review,trainer} packages imported
|
|
// below are also imported by cmd/supervisor. Plan 7 (supervisor retirement)
|
|
// MUST NOT delete these four packages — the routing pod is their second
|
|
// consumer. Plan 7 deletes only internal/skills/{tdd,spec,tier} (the skills
|
|
// that don't route to local), the supervisor binary, and supervisor manifests.
|
|
// See docs/superpowers/specs/2026-05-04-mode-2-routing-pod-design.md (Constraints).
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/mathiasbq/supervisor/internal/config"
|
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
"github.com/mathiasbq/supervisor/internal/mcp"
|
|
"github.com/mathiasbq/supervisor/internal/registry"
|
|
"github.com/mathiasbq/supervisor/internal/routing"
|
|
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
|
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
|
"github.com/mathiasbq/supervisor/internal/skills/review"
|
|
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
|
)
|
|
|
|
func main() {
|
|
logger := slog.New(slog.NewTextHandler(os.Stderr, nil))
|
|
slog.SetDefault(logger)
|
|
|
|
cfg, err := config.LoadRouting()
|
|
if err != nil {
|
|
logger.Error("config load failed", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
configDir := envOr("SUPERVISOR_CONFIG_DIR", "/app/config/supervisor")
|
|
mustRead := func(path string) string {
|
|
b, err := os.ReadFile(configDir + "/" + path)
|
|
if err != nil {
|
|
logger.Error("read prompt failed", "path", path, "err", err)
|
|
os.Exit(1)
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
llm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
|
|
|
router := &routing.Router{
|
|
Fetcher: routing.NewFetcher(cfg.BrainURL, "7d", time.Duration(cfg.PassRateTTLSeconds)*time.Second),
|
|
Logger: routing.NewLogger(cfg.BrainURL),
|
|
Policy: routing.Policy{Floor: cfg.RouteLocalFloor, Ceil: cfg.RouteLocalCeil},
|
|
FastModel: cfg.FastModel,
|
|
ThinkingModel: cfg.ThinkingModel,
|
|
Complete: llm.Complete,
|
|
}
|
|
|
|
// Skill packages call CompleteFunc(ctx, model, system, user) — no session_id
|
|
// or project_root in the signature. Rather than modifying every skill's API
|
|
// (and inflating Plan 6's blast radius), the routing pod logs every decision
|
|
// under a fixed session_id "_routing". Operators query
|
|
// `GET /pass-rate?skill=_routing&window=...` to inspect routing health.
|
|
const routingSessionID = "_routing"
|
|
wrap := func(skillName string) routing.CompleteFunc {
|
|
return func(ctx context.Context, _, system, user string) (string, int64, error) {
|
|
// The model param is ignored: the router picks the model based on policy.
|
|
return router.Run(ctx, routing.RunInput{
|
|
Skill: skillName,
|
|
System: system,
|
|
User: user,
|
|
SessionID: routingSessionID,
|
|
ProjectRoot: "",
|
|
})
|
|
}
|
|
}
|
|
|
|
reg := registry.New()
|
|
reg.Register(review.New(review.Config{
|
|
SkillPrompt: mustRead("review.md"),
|
|
DefaultModel: cfg.FastModel,
|
|
CompleteFunc: review.CompleteFunc(wrap("review")),
|
|
}))
|
|
reg.Register(debug.New(debug.Config{
|
|
SkillPrompt: mustRead("debug.md"),
|
|
DefaultModel: cfg.FastModel,
|
|
CompleteFunc: debug.CompleteFunc(wrap("debug")),
|
|
}))
|
|
reg.Register(retrospective.New(retrospective.Config{
|
|
SkillPrompt: mustRead("retrospective.md"),
|
|
DefaultModel: cfg.FastModel,
|
|
CompleteFunc: retrospective.CompleteFunc(wrap("retrospective")),
|
|
}))
|
|
reg.Register(trainer.New(trainer.Config{
|
|
ReaderPrompt: mustRead("trainer-reader.md"),
|
|
WriterPrompt: mustRead("trainer-writer.md"),
|
|
DefaultModel: cfg.FastModel,
|
|
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
|
}))
|
|
|
|
srv := mcp.NewServer(reg, cfg.MCPAuthToken)
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/mcp", srv)
|
|
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
})
|
|
|
|
addr := ":" + cfg.Port
|
|
logger.Info("routing pod starting", "addr", addr,
|
|
"fast", cfg.FastModel, "thinking", cfg.ThinkingModel,
|
|
"floor", cfg.RouteLocalFloor, "ceil", cfg.RouteLocalCeil)
|
|
if err := http.ListenAndServe(addr, mux); err != nil { //nolint:gosec
|
|
logger.Error("server stopped", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func envOr(key, def string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return def
|
|
}
|