Files
hyperguild/internal/routing/router.go
Mathias Bergqvist 5b207425ed
All checks were successful
CI / Lint / Test / Vet (pull_request) Successful in 10s
CI / Mirror to GitHub (pull_request) Has been skipped
refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider.
Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking
model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the
implicit Anthropic dependency from the routing pod — both models go
through LiteLLM.

Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL,
HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL,
Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel,
log decision "claude_fallback" → "thinking_fallback".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 16:39:42 +02:00

85 lines
2.5 KiB
Go

package routing
import (
"context"
"fmt"
"log/slog"
)
// CompleteFunc matches the signature used by every skill package's Config.
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
// RunInput captures the per-call inputs the dispatch wrapper needs.
type RunInput struct {
Skill string
System string
User string
SessionID string
ProjectRoot string
}
// Router composes a pass-rate fetcher, a decision policy, a session logger,
// and a LiteLLM client. Skill packages receive Router.Run as their CompleteFunc.
type Router struct {
Fetcher *Fetcher
Logger *Logger
Policy Policy
FastModel string
ThinkingModel string
Complete CompleteFunc
}
// Run executes one skill call: decides local vs claude, calls LiteLLM, logs the
// decision. On local-side error, falls open by retrying once on the Claude model.
func (r *Router) Run(ctx context.Context, in RunInput) (string, int64, error) {
pr, ferr := r.Fetcher.Get(ctx, in.Skill)
if ferr != nil {
slog.Warn("router: pass-rate unreachable, defaulting to local", "skill", in.Skill, "err", ferr)
pr = nil
}
hash := CanonicalHash(in.System, in.User)
decision := r.Policy.Decide(pr, hash)
model := r.ThinkingModel
if decision == DecideLocal {
model = r.FastModel
}
out, ms, err := r.Complete(ctx, model, in.System, in.User)
if lerr := r.Logger.LogDecision(ctx, LogEntry{
SessionID: in.SessionID,
Skill: in.Skill,
Decision: decision.String(),
Message: fmt.Sprintf("model=%s, pass_rate=%s", model, formatPassRate(pr)),
ProjectRoot: in.ProjectRoot,
DurationMs: ms,
Failed: err != nil,
}); lerr != nil {
slog.Warn("router: log decision failed", "skill", in.Skill, "err", lerr)
}
if err != nil && decision == DecideLocal {
slog.Warn("router: fast failed, falling open to thinking model", "skill", in.Skill, "err", err)
out, ms, err = r.Complete(ctx, r.ThinkingModel, in.System, in.User)
if lerr := r.Logger.LogDecision(ctx, LogEntry{
SessionID: in.SessionID,
Skill: in.Skill,
Decision: "thinking_fallback",
Message: fmt.Sprintf("model=%s, after-fast-error", r.ThinkingModel),
ProjectRoot: in.ProjectRoot,
DurationMs: ms,
Failed: err != nil,
}); lerr != nil {
slog.Warn("router: log decision failed", "skill", in.Skill, "err", lerr)
}
}
return out, ms, err
}
func formatPassRate(pr *float64) string {
if pr == nil {
return "null"
}
return fmt.Sprintf("%.2f", *pr)
}