Merge pull request 'refactor(routing): rename local/claude to fast/thinking model pair' (#4) from agent/thinking-fast-routing into main
This commit was merged in pull request #4.
This commit is contained in:
@@ -36,6 +36,9 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
|
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
||||||
|
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
||||||
|
PR. Do not merge without explicit instruction from Mathias.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
|
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
||||||
|
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
||||||
|
PR. Do not merge without explicit instruction from Mathias.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
|
|||||||
@@ -39,6 +39,9 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
|
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
||||||
|
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
||||||
|
PR. Do not merge without explicit instruction from Mathias.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,9 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
|
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
||||||
|
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
||||||
|
PR. Do not merge without explicit instruction from Mathias.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
|
|||||||
10
README.md
10
README.md
@@ -116,13 +116,13 @@ The supervisor probes connectivity at call time:
|
|||||||
| `ROUTING_PORT` | `3210` | Routing pod's listen port |
|
| `ROUTING_PORT` | `3210` | Routing pod's listen port |
|
||||||
| `ROUTING_MCP_TOKEN` | — | Optional bearer token for the routing MCP HTTP endpoint |
|
| `ROUTING_MCP_TOKEN` | — | Optional bearer token for the routing MCP HTTP endpoint |
|
||||||
| `BRAIN_URL` | `http://ingestion.supervisor:3300` | Routing pod → brain (in-cluster) |
|
| `BRAIN_URL` | `http://ingestion.supervisor:3300` | Routing pod → brain (in-cluster) |
|
||||||
| `HYPERGUILD_LOCAL_MODEL` | `qwen35` | Local model for routed-to-local skill calls |
|
| `HYPERGUILD_FAST_MODEL` | `koala/qwen35-9b-fast` | Fast model for high-pass-rate skill calls |
|
||||||
| `HYPERGUILD_CLAUDE_MODEL` | `claude-sonnet-4-6` | Claude model for routed-to-Claude skill calls |
|
| `HYPERGUILD_THINKING_MODEL` | `iguana/gemma4-26b` | Thinking model for low-pass-rate skill calls |
|
||||||
| `HYPERGUILD_ROUTE_LOCAL_FLOOR` | `0.90` | At/above pass rate, route to local |
|
| `HYPERGUILD_ROUTE_LOCAL_FLOOR` | `0.90` | At/above pass rate, route to fast model |
|
||||||
| `HYPERGUILD_ROUTE_LOCAL_CEIL` | `0.70` | Below pass rate, route to Claude. Between CEIL and FLOOR is the sample band. |
|
| `HYPERGUILD_ROUTE_LOCAL_CEIL` | `0.70` | Below pass rate, route to thinking model. Between CEIL and FLOOR is the sample band. |
|
||||||
| `HYPERGUILD_PASS_RATE_TTL_SECONDS` | `60` | Per-skill pass-rate cache TTL |
|
| `HYPERGUILD_PASS_RATE_TTL_SECONDS` | `60` | Per-skill pass-rate cache TTL |
|
||||||
|
|
||||||
> **Operator note:** LiteLLM at `LITELLM_BASE_URL` must register both `HYPERGUILD_LOCAL_MODEL` and `HYPERGUILD_CLAUDE_MODEL` for routing to do useful work. If a model is missing, LiteLLM returns 4xx, the routing pod's local route fails, the fail-open retry on Claude likely also fails (since both are missing), and the only signal is `final_status: "fail"` on `_routing` entries in the brain.
|
> **Operator note:** LiteLLM at `LITELLM_BASE_URL` must register both `HYPERGUILD_FAST_MODEL` and `HYPERGUILD_THINKING_MODEL` for routing to do useful work. If a model is missing, LiteLLM returns 4xx, the routing pod's fast route fails, the fail-open retry on the thinking model likely also fails (since both are missing), and the only signal is `final_status: "fail"` on `_routing` entries in the brain.
|
||||||
|
|
||||||
## Phase 2 (planned)
|
## Phase 2 (planned)
|
||||||
|
|
||||||
|
|||||||
@@ -48,12 +48,12 @@ func main() {
|
|||||||
llm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
llm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||||
|
|
||||||
router := &routing.Router{
|
router := &routing.Router{
|
||||||
Fetcher: routing.NewFetcher(cfg.BrainURL, "7d", time.Duration(cfg.PassRateTTLSeconds)*time.Second),
|
Fetcher: routing.NewFetcher(cfg.BrainURL, "7d", time.Duration(cfg.PassRateTTLSeconds)*time.Second),
|
||||||
Logger: routing.NewLogger(cfg.BrainURL),
|
Logger: routing.NewLogger(cfg.BrainURL),
|
||||||
Policy: routing.Policy{Floor: cfg.RouteLocalFloor, Ceil: cfg.RouteLocalCeil},
|
Policy: routing.Policy{Floor: cfg.RouteLocalFloor, Ceil: cfg.RouteLocalCeil},
|
||||||
LocalModel: cfg.LocalModel,
|
FastModel: cfg.FastModel,
|
||||||
ClaudeModel: cfg.ClaudeModel,
|
ThinkingModel: cfg.ThinkingModel,
|
||||||
Complete: llm.Complete,
|
Complete: llm.Complete,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill packages call CompleteFunc(ctx, model, system, user) — no session_id
|
// Skill packages call CompleteFunc(ctx, model, system, user) — no session_id
|
||||||
@@ -78,23 +78,23 @@ func main() {
|
|||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
reg.Register(review.New(review.Config{
|
reg.Register(review.New(review.Config{
|
||||||
SkillPrompt: mustRead("review.md"),
|
SkillPrompt: mustRead("review.md"),
|
||||||
DefaultModel: cfg.LocalModel,
|
DefaultModel: cfg.FastModel,
|
||||||
CompleteFunc: review.CompleteFunc(wrap("review")),
|
CompleteFunc: review.CompleteFunc(wrap("review")),
|
||||||
}))
|
}))
|
||||||
reg.Register(debug.New(debug.Config{
|
reg.Register(debug.New(debug.Config{
|
||||||
SkillPrompt: mustRead("debug.md"),
|
SkillPrompt: mustRead("debug.md"),
|
||||||
DefaultModel: cfg.LocalModel,
|
DefaultModel: cfg.FastModel,
|
||||||
CompleteFunc: debug.CompleteFunc(wrap("debug")),
|
CompleteFunc: debug.CompleteFunc(wrap("debug")),
|
||||||
}))
|
}))
|
||||||
reg.Register(retrospective.New(retrospective.Config{
|
reg.Register(retrospective.New(retrospective.Config{
|
||||||
SkillPrompt: mustRead("retrospective.md"),
|
SkillPrompt: mustRead("retrospective.md"),
|
||||||
DefaultModel: cfg.LocalModel,
|
DefaultModel: cfg.FastModel,
|
||||||
CompleteFunc: retrospective.CompleteFunc(wrap("retrospective")),
|
CompleteFunc: retrospective.CompleteFunc(wrap("retrospective")),
|
||||||
}))
|
}))
|
||||||
reg.Register(trainer.New(trainer.Config{
|
reg.Register(trainer.New(trainer.Config{
|
||||||
ReaderPrompt: mustRead("trainer-reader.md"),
|
ReaderPrompt: mustRead("trainer-reader.md"),
|
||||||
WriterPrompt: mustRead("trainer-writer.md"),
|
WriterPrompt: mustRead("trainer-writer.md"),
|
||||||
DefaultModel: cfg.LocalModel,
|
DefaultModel: cfg.FastModel,
|
||||||
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
@@ -107,7 +107,7 @@ func main() {
|
|||||||
|
|
||||||
addr := ":" + cfg.Port
|
addr := ":" + cfg.Port
|
||||||
logger.Info("routing pod starting", "addr", addr,
|
logger.Info("routing pod starting", "addr", addr,
|
||||||
"local", cfg.LocalModel, "claude", cfg.ClaudeModel,
|
"fast", cfg.FastModel, "thinking", cfg.ThinkingModel,
|
||||||
"floor", cfg.RouteLocalFloor, "ceil", cfg.RouteLocalCeil)
|
"floor", cfg.RouteLocalFloor, "ceil", cfg.RouteLocalCeil)
|
||||||
if err := http.ListenAndServe(addr, mux); err != nil { //nolint:gosec
|
if err := http.ListenAndServe(addr, mux); err != nil { //nolint:gosec
|
||||||
logger.Error("server stopped", "err", err)
|
logger.Error("server stopped", "err", err)
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ type RoutingConfig struct {
|
|||||||
LiteLLMBaseURL string // LITELLM_BASE_URL, default http://piguard:4000
|
LiteLLMBaseURL string // LITELLM_BASE_URL, default http://piguard:4000
|
||||||
LiteLLMAPIKey string // LITELLM_API_KEY
|
LiteLLMAPIKey string // LITELLM_API_KEY
|
||||||
BrainURL string // BRAIN_URL, default http://ingestion.supervisor:3300
|
BrainURL string // BRAIN_URL, default http://ingestion.supervisor:3300
|
||||||
LocalModel string // HYPERGUILD_LOCAL_MODEL, default qwen35
|
FastModel string // HYPERGUILD_FAST_MODEL, default koala/qwen35-9b-fast
|
||||||
ClaudeModel string // HYPERGUILD_CLAUDE_MODEL, default claude-sonnet-4-6
|
ThinkingModel string // HYPERGUILD_THINKING_MODEL, default iguana/gemma4-26b
|
||||||
// RouteLocalFloor and RouteLocalCeil intentionally invert the usual
|
// RouteLocalFloor and RouteLocalCeil intentionally invert the usual
|
||||||
// floor < ceil mathematical convention: Floor (default 0.90) is the
|
// floor < ceil mathematical convention: Floor (default 0.90) is the
|
||||||
// UPPER boundary — at/above it, always route local; Ceil (default 0.70)
|
// UPPER boundary — at/above it, always route local; Ceil (default 0.70)
|
||||||
@@ -34,8 +34,8 @@ func LoadRouting() (RoutingConfig, error) {
|
|||||||
LiteLLMBaseURL: envOr("LITELLM_BASE_URL", "http://piguard:4000"),
|
LiteLLMBaseURL: envOr("LITELLM_BASE_URL", "http://piguard:4000"),
|
||||||
LiteLLMAPIKey: os.Getenv("LITELLM_API_KEY"),
|
LiteLLMAPIKey: os.Getenv("LITELLM_API_KEY"),
|
||||||
BrainURL: envOr("BRAIN_URL", "http://ingestion.supervisor:3300"),
|
BrainURL: envOr("BRAIN_URL", "http://ingestion.supervisor:3300"),
|
||||||
LocalModel: envOr("HYPERGUILD_LOCAL_MODEL", "qwen35"),
|
FastModel: envOr("HYPERGUILD_FAST_MODEL", "koala/qwen35-9b-fast"),
|
||||||
ClaudeModel: envOr("HYPERGUILD_CLAUDE_MODEL", "claude-sonnet-4-6"),
|
ThinkingModel: envOr("HYPERGUILD_THINKING_MODEL", "iguana/gemma4-26b"),
|
||||||
}
|
}
|
||||||
|
|
||||||
floor, err := parseFloatEnv("HYPERGUILD_ROUTE_LOCAL_FLOOR", 0.90)
|
floor, err := parseFloatEnv("HYPERGUILD_ROUTE_LOCAL_FLOOR", 0.90)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
func TestLoadRoutingDefaults(t *testing.T) {
|
func TestLoadRoutingDefaults(t *testing.T) {
|
||||||
for _, k := range []string{
|
for _, k := range []string{
|
||||||
"ROUTING_PORT", "ROUTING_MCP_TOKEN", "LITELLM_BASE_URL", "LITELLM_API_KEY",
|
"ROUTING_PORT", "ROUTING_MCP_TOKEN", "LITELLM_BASE_URL", "LITELLM_API_KEY",
|
||||||
"BRAIN_URL", "HYPERGUILD_LOCAL_MODEL", "HYPERGUILD_CLAUDE_MODEL",
|
"BRAIN_URL", "HYPERGUILD_FAST_MODEL", "HYPERGUILD_THINKING_MODEL",
|
||||||
"HYPERGUILD_ROUTE_LOCAL_FLOOR", "HYPERGUILD_ROUTE_LOCAL_CEIL",
|
"HYPERGUILD_ROUTE_LOCAL_FLOOR", "HYPERGUILD_ROUTE_LOCAL_CEIL",
|
||||||
"HYPERGUILD_PASS_RATE_TTL_SECONDS",
|
"HYPERGUILD_PASS_RATE_TTL_SECONDS",
|
||||||
} {
|
} {
|
||||||
@@ -24,8 +24,8 @@ func TestLoadRoutingDefaults(t *testing.T) {
|
|||||||
assert.Equal(t, "", cfg.MCPAuthToken)
|
assert.Equal(t, "", cfg.MCPAuthToken)
|
||||||
assert.Equal(t, "http://piguard:4000", cfg.LiteLLMBaseURL)
|
assert.Equal(t, "http://piguard:4000", cfg.LiteLLMBaseURL)
|
||||||
assert.Equal(t, "http://ingestion.supervisor:3300", cfg.BrainURL)
|
assert.Equal(t, "http://ingestion.supervisor:3300", cfg.BrainURL)
|
||||||
assert.Equal(t, "qwen35", cfg.LocalModel)
|
assert.Equal(t, "koala/qwen35-9b-fast", cfg.FastModel)
|
||||||
assert.Equal(t, "claude-sonnet-4-6", cfg.ClaudeModel)
|
assert.Equal(t, "iguana/gemma4-26b", cfg.ThinkingModel)
|
||||||
assert.InDelta(t, 0.90, cfg.RouteLocalFloor, 1e-9)
|
assert.InDelta(t, 0.90, cfg.RouteLocalFloor, 1e-9)
|
||||||
assert.InDelta(t, 0.70, cfg.RouteLocalCeil, 1e-9)
|
assert.InDelta(t, 0.70, cfg.RouteLocalCeil, 1e-9)
|
||||||
assert.Equal(t, 60, cfg.PassRateTTLSeconds)
|
assert.Equal(t, 60, cfg.PassRateTTLSeconds)
|
||||||
@@ -38,8 +38,8 @@ func TestLoadRoutingFromEnv(t *testing.T) {
|
|||||||
t.Setenv("LITELLM_BASE_URL", "http://localhost:4000")
|
t.Setenv("LITELLM_BASE_URL", "http://localhost:4000")
|
||||||
t.Setenv("LITELLM_API_KEY", "lk")
|
t.Setenv("LITELLM_API_KEY", "lk")
|
||||||
t.Setenv("BRAIN_URL", "http://localhost:3300")
|
t.Setenv("BRAIN_URL", "http://localhost:3300")
|
||||||
t.Setenv("HYPERGUILD_LOCAL_MODEL", "qwen2-7b")
|
t.Setenv("HYPERGUILD_FAST_MODEL", "koala/phi4-14b")
|
||||||
t.Setenv("HYPERGUILD_CLAUDE_MODEL", "claude-opus-4-7")
|
t.Setenv("HYPERGUILD_THINKING_MODEL", "iguana/qwen3-14b-think")
|
||||||
t.Setenv("HYPERGUILD_ROUTE_LOCAL_FLOOR", "0.85")
|
t.Setenv("HYPERGUILD_ROUTE_LOCAL_FLOOR", "0.85")
|
||||||
t.Setenv("HYPERGUILD_ROUTE_LOCAL_CEIL", "0.65")
|
t.Setenv("HYPERGUILD_ROUTE_LOCAL_CEIL", "0.65")
|
||||||
t.Setenv("HYPERGUILD_PASS_RATE_TTL_SECONDS", "30")
|
t.Setenv("HYPERGUILD_PASS_RATE_TTL_SECONDS", "30")
|
||||||
@@ -51,8 +51,8 @@ func TestLoadRoutingFromEnv(t *testing.T) {
|
|||||||
assert.Equal(t, "http://localhost:4000", cfg.LiteLLMBaseURL)
|
assert.Equal(t, "http://localhost:4000", cfg.LiteLLMBaseURL)
|
||||||
assert.Equal(t, "lk", cfg.LiteLLMAPIKey)
|
assert.Equal(t, "lk", cfg.LiteLLMAPIKey)
|
||||||
assert.Equal(t, "http://localhost:3300", cfg.BrainURL)
|
assert.Equal(t, "http://localhost:3300", cfg.BrainURL)
|
||||||
assert.Equal(t, "qwen2-7b", cfg.LocalModel)
|
assert.Equal(t, "koala/phi4-14b", cfg.FastModel)
|
||||||
assert.Equal(t, "claude-opus-4-7", cfg.ClaudeModel)
|
assert.Equal(t, "iguana/qwen3-14b-think", cfg.ThinkingModel)
|
||||||
assert.InDelta(t, 0.85, cfg.RouteLocalFloor, 1e-9)
|
assert.InDelta(t, 0.85, cfg.RouteLocalFloor, 1e-9)
|
||||||
assert.InDelta(t, 0.65, cfg.RouteLocalCeil, 1e-9)
|
assert.InDelta(t, 0.65, cfg.RouteLocalCeil, 1e-9)
|
||||||
assert.Equal(t, 30, cfg.PassRateTTLSeconds)
|
assert.Equal(t, 30, cfg.PassRateTTLSeconds)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import (
|
|||||||
type LogEntry struct {
|
type LogEntry struct {
|
||||||
SessionID string
|
SessionID string
|
||||||
Skill string // the original skill the call routed (e.g., "review")
|
Skill string // the original skill the call routed (e.g., "review")
|
||||||
Decision string // "local" or "claude" or "claude_fallback"
|
Decision string // "local" or "thinking" or "thinking_fallback"
|
||||||
Message string // free-form, e.g. "model=qwen35, pass_rate=0.94"
|
Message string // free-form, e.g. "model=qwen35, pass_rate=0.94"
|
||||||
ProjectRoot string
|
ProjectRoot string
|
||||||
DurationMs int64
|
DurationMs int64
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ type Router struct {
|
|||||||
Fetcher *Fetcher
|
Fetcher *Fetcher
|
||||||
Logger *Logger
|
Logger *Logger
|
||||||
Policy Policy
|
Policy Policy
|
||||||
LocalModel string
|
FastModel string
|
||||||
ClaudeModel string
|
ThinkingModel string
|
||||||
Complete CompleteFunc
|
Complete CompleteFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,9 +40,9 @@ func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
|
|||||||
hash := CanonicalHash(in.System, in.User)
|
hash := CanonicalHash(in.System, in.User)
|
||||||
decision := r.Policy.Decide(pr, hash)
|
decision := r.Policy.Decide(pr, hash)
|
||||||
|
|
||||||
model := r.ClaudeModel
|
model := r.ThinkingModel
|
||||||
if decision == DecideLocal {
|
if decision == DecideLocal {
|
||||||
model = r.LocalModel
|
model = r.FastModel
|
||||||
}
|
}
|
||||||
|
|
||||||
out, ms, err := r.Complete(ctx, model, in.System, in.User)
|
out, ms, err := r.Complete(ctx, model, in.System, in.User)
|
||||||
@@ -59,13 +59,13 @@ func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err != nil && decision == DecideLocal {
|
if err != nil && decision == DecideLocal {
|
||||||
slog.Warn("router: local failed, falling open to claude", "skill", in.Skill, "err", err)
|
slog.Warn("router: fast failed, falling open to thinking model", "skill", in.Skill, "err", err)
|
||||||
out, ms, err = r.Complete(ctx, r.ClaudeModel, in.System, in.User)
|
out, ms, err = r.Complete(ctx, r.ThinkingModel, in.System, in.User)
|
||||||
if lerr := r.Logger.LogDecision(ctx, LogEntry{
|
if lerr := r.Logger.LogDecision(ctx, LogEntry{
|
||||||
SessionID: in.SessionID,
|
SessionID: in.SessionID,
|
||||||
Skill: in.Skill,
|
Skill: in.Skill,
|
||||||
Decision: "claude_fallback",
|
Decision: "thinking_fallback",
|
||||||
Message: fmt.Sprintf("model=%s, after-local-error", r.ClaudeModel),
|
Message: fmt.Sprintf("model=%s, after-fast-error", r.ThinkingModel),
|
||||||
ProjectRoot: in.ProjectRoot,
|
ProjectRoot: in.ProjectRoot,
|
||||||
DurationMs: ms,
|
DurationMs: ms,
|
||||||
Failed: err != nil,
|
Failed: err != nil,
|
||||||
|
|||||||
@@ -49,12 +49,12 @@ func newRouter(t *testing.T, llm *fakeLLM, passRate float64) (*routing.Router, *
|
|||||||
t.Cleanup(brain.Close)
|
t.Cleanup(brain.Close)
|
||||||
|
|
||||||
r := &routing.Router{
|
r := &routing.Router{
|
||||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||||
Logger: routing.NewLogger(brain.URL),
|
Logger: routing.NewLogger(brain.URL),
|
||||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||||
LocalModel: "qwen35",
|
FastModel: "koala/qwen35-9b-fast",
|
||||||
ClaudeModel: "claude-sonnet-4-6",
|
ThinkingModel: "iguana/gemma4-26b",
|
||||||
Complete: llm.Complete,
|
Complete: llm.Complete,
|
||||||
}
|
}
|
||||||
return r, brain, brain
|
return r, brain, brain
|
||||||
}
|
}
|
||||||
@@ -72,10 +72,10 @@ func TestRouterRoutesLocalAtHighPassRate(t *testing.T) {
|
|||||||
llm.mu.Lock()
|
llm.mu.Lock()
|
||||||
defer llm.mu.Unlock()
|
defer llm.mu.Unlock()
|
||||||
require.Len(t, llm.calls, 1)
|
require.Len(t, llm.calls, 1)
|
||||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRouterRoutesClaudeAtLowPassRate(t *testing.T) {
|
func TestRouterRoutesThinkingAtLowPassRate(t *testing.T) {
|
||||||
llm := &fakeLLM{resp: "ok"}
|
llm := &fakeLLM{resp: "ok"}
|
||||||
r, _, _ := newRouter(t, llm, 0.3)
|
r, _, _ := newRouter(t, llm, 0.3)
|
||||||
|
|
||||||
@@ -87,12 +87,12 @@ func TestRouterRoutesClaudeAtLowPassRate(t *testing.T) {
|
|||||||
llm.mu.Lock()
|
llm.mu.Lock()
|
||||||
defer llm.mu.Unlock()
|
defer llm.mu.Unlock()
|
||||||
require.Len(t, llm.calls, 1)
|
require.Len(t, llm.calls, 1)
|
||||||
assert.Equal(t, "claude-sonnet-4-6", llm.calls[0].Model)
|
assert.Equal(t, "iguana/gemma4-26b", llm.calls[0].Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRouterFailsOpenLocalErrorToClaude(t *testing.T) {
|
func TestRouterFailsOpenFastErrorToThinking(t *testing.T) {
|
||||||
llm := &fakeLLM{resp: "ok-after-fallback", err: errors.New("local boom"), errOn: "qwen35"}
|
llm := &fakeLLM{resp: "ok-after-fallback", err: errors.New("fast boom"), errOn: "koala/qwen35-9b-fast"}
|
||||||
r, _, _ := newRouter(t, llm, 0.95) // would route local
|
r, _, _ := newRouter(t, llm, 0.95) // would route fast
|
||||||
|
|
||||||
out, _, err := r.Run(context.Background(), routing.RunInput{
|
out, _, err := r.Run(context.Background(), routing.RunInput{
|
||||||
Skill: "review", System: "sys", User: "user", SessionID: "s3",
|
Skill: "review", System: "sys", User: "user", SessionID: "s3",
|
||||||
@@ -103,12 +103,12 @@ func TestRouterFailsOpenLocalErrorToClaude(t *testing.T) {
|
|||||||
llm.mu.Lock()
|
llm.mu.Lock()
|
||||||
defer llm.mu.Unlock()
|
defer llm.mu.Unlock()
|
||||||
require.Len(t, llm.calls, 2)
|
require.Len(t, llm.calls, 2)
|
||||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||||
assert.Equal(t, "claude-sonnet-4-6", llm.calls[1].Model)
|
assert.Equal(t, "iguana/gemma4-26b", llm.calls[1].Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
func TestRouterDefaultsToFastWhenBrainUnreachable(t *testing.T) {
|
||||||
// Brain returns 500 → fetcher errors → router treats pass rate as nil → local.
|
// Brain returns 500 → fetcher errors → router treats pass rate as nil → fast.
|
||||||
brain := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
brain := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
http.Error(w, "down", http.StatusInternalServerError)
|
http.Error(w, "down", http.StatusInternalServerError)
|
||||||
}))
|
}))
|
||||||
@@ -116,12 +116,12 @@ func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
|||||||
|
|
||||||
llm := &fakeLLM{resp: "ok"}
|
llm := &fakeLLM{resp: "ok"}
|
||||||
r := &routing.Router{
|
r := &routing.Router{
|
||||||
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
Fetcher: routing.NewFetcher(brain.URL, "7d", time.Minute),
|
||||||
Logger: routing.NewLogger(brain.URL),
|
Logger: routing.NewLogger(brain.URL),
|
||||||
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
Policy: routing.Policy{Floor: 0.9, Ceil: 0.7},
|
||||||
LocalModel: "qwen35",
|
FastModel: "koala/qwen35-9b-fast",
|
||||||
ClaudeModel: "claude-sonnet-4-6",
|
ThinkingModel: "iguana/gemma4-26b",
|
||||||
Complete: llm.Complete,
|
Complete: llm.Complete,
|
||||||
}
|
}
|
||||||
|
|
||||||
_, _, err := r.Run(context.Background(), routing.RunInput{
|
_, _, err := r.Run(context.Background(), routing.RunInput{
|
||||||
@@ -132,5 +132,5 @@ func TestRouterDefaultsToLocalWhenBrainUnreachable(t *testing.T) {
|
|||||||
llm.mu.Lock()
|
llm.mu.Lock()
|
||||||
defer llm.mu.Unlock()
|
defer llm.mu.Unlock()
|
||||||
require.Len(t, llm.calls, 1)
|
require.Len(t, llm.calls, 1)
|
||||||
assert.Equal(t, "qwen35", llm.calls[0].Model)
|
assert.Equal(t, "koala/qwen35-9b-fast", llm.calls[0].Model)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user