feat(ingestion): expose Prometheus /metrics for brain query latency
Closes infra#50.
Adds an internal/metrics package with a hand-rolled Prometheus
exposition layer (stdlib + sync/atomic only — no new dep) and wraps the
HTTP mux with a timing middleware. Every request emits one observation
on the `brain_query_duration_seconds` histogram labeled by
`path` (request Pattern, low cardinality) and `status` (2xx/3xx/4xx/5xx).
Dependency choice: hand-rolled rather than github.com/prometheus/client_golang
because the surface needed is small (one histogram + bucket constants)
and the repo CLAUDE.md keeps deps stdlib + jwx + testify only. ~150 LOC
of code + tests is cheaper than the chart of transitive prometheus deps.
Endpoints:
- GET /metrics — OpenMetrics text exposition, no auth (cluster-internal)
Wire format pinned by tests in internal/metrics/metrics_test.go. The
ServiceMonitor that drives the kube-prometheus-stack scrape lives in
infra/k3s/apps/supervisor/ (separate commit on mathias/infra).
After this image deploys, the canary alert from
docs/superpowers/specs/2026-05-homelab-architecture-review.md becomes
wireable:
histogram_quantile(0.95,
sum(rate(brain_query_duration_seconds_bucket[5m])) by (le))
> 1.5 * histogram_quantile(0.95,
sum(rate(brain_query_duration_seconds_bucket[5m] offset 7d)) by (le))
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ import (
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/embed"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/metrics"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||
@@ -235,6 +236,15 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// /metrics — unauthenticated Prometheus endpoint. kube-prometheus-stack
|
||||
// scrapes it via the ServiceMonitor in k3s/apps/supervisor/. The metrics
|
||||
// middleware below wraps every other registered handler so it observes
|
||||
// real request latency. /metrics itself is excluded from its own
|
||||
// observation by registering it on the outer mux (post-wrap).
|
||||
reg := metrics.New()
|
||||
mux.HandleFunc("GET /metrics", reg.Handler())
|
||||
logger.Info("metrics endpoint registered", "path", "/metrics")
|
||||
|
||||
addr := ":" + port
|
||||
watchIntervalLog := "disabled"
|
||||
if watchInterval > 0 {
|
||||
@@ -249,7 +259,7 @@ func main() {
|
||||
"watch_interval", watchIntervalLog,
|
||||
"mcp_enabled", true,
|
||||
)
|
||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||
if err := http.ListenAndServe(addr, reg.Middleware(mux)); err != nil {
|
||||
logger.Error("server stopped", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user