refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider. Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the implicit Anthropic dependency from the routing pod — both models go through LiteLLM. Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL, HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL, Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel, log decision "claude_fallback" → "thinking_fallback". Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -48,12 +48,12 @@ func main() {
|
||||
llm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||
|
||||
router := &routing.Router{
|
||||
Fetcher: routing.NewFetcher(cfg.BrainURL, "7d", time.Duration(cfg.PassRateTTLSeconds)*time.Second),
|
||||
Logger: routing.NewLogger(cfg.BrainURL),
|
||||
Policy: routing.Policy{Floor: cfg.RouteLocalFloor, Ceil: cfg.RouteLocalCeil},
|
||||
LocalModel: cfg.LocalModel,
|
||||
ClaudeModel: cfg.ClaudeModel,
|
||||
Complete: llm.Complete,
|
||||
Fetcher: routing.NewFetcher(cfg.BrainURL, "7d", time.Duration(cfg.PassRateTTLSeconds)*time.Second),
|
||||
Logger: routing.NewLogger(cfg.BrainURL),
|
||||
Policy: routing.Policy{Floor: cfg.RouteLocalFloor, Ceil: cfg.RouteLocalCeil},
|
||||
FastModel: cfg.FastModel,
|
||||
ThinkingModel: cfg.ThinkingModel,
|
||||
Complete: llm.Complete,
|
||||
}
|
||||
|
||||
// Skill packages call CompleteFunc(ctx, model, system, user) — no session_id
|
||||
@@ -78,23 +78,23 @@ func main() {
|
||||
reg := registry.New()
|
||||
reg.Register(review.New(review.Config{
|
||||
SkillPrompt: mustRead("review.md"),
|
||||
DefaultModel: cfg.LocalModel,
|
||||
DefaultModel: cfg.FastModel,
|
||||
CompleteFunc: review.CompleteFunc(wrap("review")),
|
||||
}))
|
||||
reg.Register(debug.New(debug.Config{
|
||||
SkillPrompt: mustRead("debug.md"),
|
||||
DefaultModel: cfg.LocalModel,
|
||||
DefaultModel: cfg.FastModel,
|
||||
CompleteFunc: debug.CompleteFunc(wrap("debug")),
|
||||
}))
|
||||
reg.Register(retrospective.New(retrospective.Config{
|
||||
SkillPrompt: mustRead("retrospective.md"),
|
||||
DefaultModel: cfg.LocalModel,
|
||||
DefaultModel: cfg.FastModel,
|
||||
CompleteFunc: retrospective.CompleteFunc(wrap("retrospective")),
|
||||
}))
|
||||
reg.Register(trainer.New(trainer.Config{
|
||||
ReaderPrompt: mustRead("trainer-reader.md"),
|
||||
WriterPrompt: mustRead("trainer-writer.md"),
|
||||
DefaultModel: cfg.LocalModel,
|
||||
DefaultModel: cfg.FastModel,
|
||||
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
||||
}))
|
||||
|
||||
@@ -107,7 +107,7 @@ func main() {
|
||||
|
||||
addr := ":" + cfg.Port
|
||||
logger.Info("routing pod starting", "addr", addr,
|
||||
"local", cfg.LocalModel, "claude", cfg.ClaudeModel,
|
||||
"fast", cfg.FastModel, "thinking", cfg.ThinkingModel,
|
||||
"floor", cfg.RouteLocalFloor, "ceil", cfg.RouteLocalCeil)
|
||||
if err := http.ListenAndServe(addr, mux); err != nil { //nolint:gosec
|
||||
logger.Error("server stopped", "err", err)
|
||||
|
||||
Reference in New Issue
Block a user