Closes infra#50.
Adds an internal/metrics package with a hand-rolled Prometheus
exposition layer (stdlib + sync/atomic only — no new dep) and wraps the
HTTP mux with a timing middleware. Every request emits one observation
on the `brain_query_duration_seconds` histogram labeled by
`path` (request Pattern, low cardinality) and `status` (2xx/3xx/4xx/5xx).
Dependency choice: hand-rolled rather than github.com/prometheus/client_golang
because the surface needed is small (one histogram + bucket constants)
and the repo CLAUDE.md keeps deps stdlib + jwx + testify only. ~150 LOC
of code + tests is cheaper than the chart of transitive prometheus deps.
Endpoints:
- GET /metrics — OpenMetrics text exposition, no auth (cluster-internal)
Wire format pinned by tests in internal/metrics/metrics_test.go. The
ServiceMonitor that drives the kube-prometheus-stack scrape lives in
infra/k3s/apps/supervisor/ (separate commit on mathias/infra).
After this image deploys, the canary alert from
docs/superpowers/specs/2026-05-homelab-architecture-review.md becomes
wireable:
histogram_quantile(0.95,
sum(rate(brain_query_duration_seconds_bucket[5m])) by (le))
> 1.5 * histogram_quantile(0.95,
sum(rate(brain_query_duration_seconds_bucket[5m] offset 7d)) by (le))
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
3.3 KiB
Go
120 lines
3.3 KiB
Go
package metrics
|
|
|
|
import (
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestRegistry_ObserveAndExpose(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
r := New()
|
|
// Three observations on the same series; one falls into each
|
|
// representative band.
|
|
r.Observe("/query", "2xx", 4*time.Millisecond) // ≤ 5ms
|
|
r.Observe("/query", "2xx", 20*time.Millisecond) // ≤ 25ms
|
|
r.Observe("/query", "2xx", 600*time.Millisecond) // ≤ 1s
|
|
|
|
req := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
|
rec := httptest.NewRecorder()
|
|
r.Handler().ServeHTTP(rec, req)
|
|
|
|
body := rec.Body.String()
|
|
|
|
mustContain := []string{
|
|
`# TYPE brain_query_duration_seconds histogram`,
|
|
`brain_query_duration_seconds_bucket{path="/query",status="2xx",le="0.005"} 1`,
|
|
`brain_query_duration_seconds_bucket{path="/query",status="2xx",le="0.025"} 2`,
|
|
`brain_query_duration_seconds_bucket{path="/query",status="2xx",le="1.0"} 3`,
|
|
`brain_query_duration_seconds_bucket{path="/query",status="2xx",le="+Inf"} 3`,
|
|
`brain_query_duration_seconds_count{path="/query",status="2xx"} 3`,
|
|
}
|
|
for _, want := range mustContain {
|
|
if !strings.Contains(body, want) {
|
|
t.Errorf("missing line: %q\n--- body ---\n%s", want, body)
|
|
}
|
|
}
|
|
|
|
if got := rec.Header().Get("Content-Type"); !strings.HasPrefix(got, "text/plain") {
|
|
t.Errorf("content-type = %q, want text/plain prefix", got)
|
|
}
|
|
}
|
|
|
|
func TestRegistry_LabelsByStatus(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
r := New()
|
|
r.Observe("/query", "2xx", time.Millisecond)
|
|
r.Observe("/query", "5xx", time.Millisecond)
|
|
r.Observe("/write", "2xx", time.Millisecond)
|
|
|
|
rec := httptest.NewRecorder()
|
|
r.Handler().ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/metrics", nil))
|
|
body := rec.Body.String()
|
|
|
|
for _, want := range []string{
|
|
`brain_query_duration_seconds_count{path="/query",status="2xx"} 1`,
|
|
`brain_query_duration_seconds_count{path="/query",status="5xx"} 1`,
|
|
`brain_query_duration_seconds_count{path="/write",status="2xx"} 1`,
|
|
} {
|
|
if !strings.Contains(body, want) {
|
|
t.Errorf("missing %q in body:\n%s", want, body)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMiddleware_RecordsTiming(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
r := New()
|
|
handler := r.Middleware(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
time.Sleep(2 * time.Millisecond)
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = io.WriteString(w, "ok")
|
|
}))
|
|
|
|
srv := httptest.NewServer(handler)
|
|
defer srv.Close()
|
|
|
|
resp, err := http.Get(srv.URL + "/query")
|
|
if err != nil {
|
|
t.Fatalf("get: %v", err)
|
|
}
|
|
_ = resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
t.Fatalf("status %d, want 200", resp.StatusCode)
|
|
}
|
|
|
|
// Exposition should now include /query.
|
|
rec := httptest.NewRecorder()
|
|
r.Handler().ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/metrics", nil))
|
|
body := rec.Body.String()
|
|
if !strings.Contains(body, `path="/query"`) {
|
|
t.Errorf("expected /query series, got body:\n%s", body)
|
|
}
|
|
if !strings.Contains(body, `status="2xx"`) {
|
|
t.Errorf("expected 2xx status class, got body:\n%s", body)
|
|
}
|
|
}
|
|
|
|
func TestStatusRecorder_DefaultsTo200(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
r := New()
|
|
handler := r.Middleware(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
_, _ = w.Write([]byte("hello"))
|
|
}))
|
|
|
|
rec := httptest.NewRecorder()
|
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/x", nil))
|
|
|
|
if rec.Code != http.StatusOK {
|
|
t.Errorf("code %d, want 200", rec.Code)
|
|
}
|
|
}
|