Files
hyperguild/internal/config/routing.go
Mathias Bergqvist 5b207425ed
All checks were successful
CI / Lint / Test / Vet (pull_request) Successful in 10s
CI / Mirror to GitHub (pull_request) Has been skipped
refactor(routing): rename local/claude to fast/thinking model pair
The routing decision is about reasoning capacity, not cost or provider.
Fast model (koala/qwen35-9b-fast) handles high-pass-rate calls; thinking
model (iguana/gemma4-26b) handles low-pass-rate calls. Removes the
implicit Anthropic dependency from the routing pod — both models go
through LiteLLM.

Renames: HYPERGUILD_LOCAL_MODEL → HYPERGUILD_FAST_MODEL,
HYPERGUILD_CLAUDE_MODEL → HYPERGUILD_THINKING_MODEL,
Router.LocalModel → FastModel, Router.ClaudeModel → ThinkingModel,
log decision "claude_fallback" → "thinking_fallback".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 16:39:42 +02:00

85 lines
2.8 KiB
Go

package config
import (
"fmt"
"os"
"strconv"
)
// RoutingConfig holds the runtime configuration for the routing pod.
// Separate from Config because the routing pod's surface differs from the supervisor's.
type RoutingConfig struct {
Port string // ROUTING_PORT, default 3210
MCPAuthToken string // ROUTING_MCP_TOKEN, optional bearer token
LiteLLMBaseURL string // LITELLM_BASE_URL, default http://piguard:4000
LiteLLMAPIKey string // LITELLM_API_KEY
BrainURL string // BRAIN_URL, default http://ingestion.supervisor:3300
FastModel string // HYPERGUILD_FAST_MODEL, default koala/qwen35-9b-fast
ThinkingModel string // HYPERGUILD_THINKING_MODEL, default iguana/gemma4-26b
// RouteLocalFloor and RouteLocalCeil intentionally invert the usual
// floor < ceil mathematical convention: Floor (default 0.90) is the
// UPPER boundary — at/above it, always route local; Ceil (default 0.70)
// is the LOWER boundary — below it, always route Claude. The band in
// between is the 50/50 sample zone. The naming follows the spec's policy
// vocabulary; see internal/routing/policy.go for the consumer.
RouteLocalFloor float64 // HYPERGUILD_ROUTE_LOCAL_FLOOR, default 0.90
RouteLocalCeil float64 // HYPERGUILD_ROUTE_LOCAL_CEIL, default 0.70
PassRateTTLSeconds int // HYPERGUILD_PASS_RATE_TTL_SECONDS, default 60
}
func LoadRouting() (RoutingConfig, error) {
cfg := RoutingConfig{
Port: envOr("ROUTING_PORT", "3210"),
MCPAuthToken: os.Getenv("ROUTING_MCP_TOKEN"),
LiteLLMBaseURL: envOr("LITELLM_BASE_URL", "http://piguard:4000"),
LiteLLMAPIKey: os.Getenv("LITELLM_API_KEY"),
BrainURL: envOr("BRAIN_URL", "http://ingestion.supervisor:3300"),
FastModel: envOr("HYPERGUILD_FAST_MODEL", "koala/qwen35-9b-fast"),
ThinkingModel: envOr("HYPERGUILD_THINKING_MODEL", "iguana/gemma4-26b"),
}
floor, err := parseFloatEnv("HYPERGUILD_ROUTE_LOCAL_FLOOR", 0.90)
if err != nil {
return RoutingConfig{}, err
}
cfg.RouteLocalFloor = floor
ceil, err := parseFloatEnv("HYPERGUILD_ROUTE_LOCAL_CEIL", 0.70)
if err != nil {
return RoutingConfig{}, err
}
cfg.RouteLocalCeil = ceil
ttl, err := parseIntEnv("HYPERGUILD_PASS_RATE_TTL_SECONDS", 60)
if err != nil {
return RoutingConfig{}, err
}
cfg.PassRateTTLSeconds = ttl
return cfg, nil
}
func parseFloatEnv(key string, def float64) (float64, error) {
v := os.Getenv(key)
if v == "" {
return def, nil
}
f, err := strconv.ParseFloat(v, 64)
if err != nil {
return 0, fmt.Errorf("config: %s: %w", key, err)
}
return f, nil
}
func parseIntEnv(key string, def int) (int, error) {
v := os.Getenv(key)
if v == "" {
return def, nil
}
n, err := strconv.Atoi(v)
if err != nil {
return 0, fmt.Errorf("config: %s: %w", key, err)
}
return n, nil
}