feat(mcp): wire brain_context tool

Returns top-N relevant brain entries for a project context. Combines BM25 hits on project name with 2-hop graph expansion via Track A's graphstore (when BRAIN_GRAPH_ENABLED). Closes hyperguild#28. Notes on implementation choices that deviate slightly from the spec: - Excerpt length: 200 chars per spec (vs the 300 used by search.Result). truncateExcerpt clamps the already-stripped BM25 excerpt; graph-only neighbours load their excerpt from disk via a private readExcerpt helper (search.hydrate is unexported). - Graph scoring: 0.6 / max(1, distance) per neighbour, so distance-1 contributes 0.6 and distance-2 contributes 0.3. BM25 hits decay linearly from 3.0 (rank-0) to 1.0 (rank-2), giving BM25 hits a natural ceiling above pure-graph hits while still letting a doc surfaced via both edge types outrank a BM25-only one. - Test placement: package mcp (internal) rather than mcp_test, because graphReader is unexported and WithGraph only accepts *PGStore; an internal test can install a dual-interface fake directly on s.graph without spinning up postgres. Bump-Type: minor Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 18:53:14 +02:00
parent 2b7bbe38c7
commit 2726896079
5 changed files with 433 additions and 2 deletions
--- a/ingestion/internal/mcp/tools_context.go
+++ b/ingestion/internal/mcp/tools_context.go
@@ -0,0 +1,202 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"github.com/mathiasbq/hyperguild/ingestion/internal/search"
+)
+
+// brainContextArgs is the input shape of brain_context. project_root is
+// required; recent_files biases ranking when provided; limit caps the
+// returned set (default 10).
+type brainContextArgs struct {
+	ProjectRoot string   `json:"project_root"`
+	RecentFiles []string `json:"recent_files,omitempty"`
+	Limit       int      `json:"limit,omitempty"`
+}
+
+// contextEntry is one returned brain entry: the slug, its title,
+// frontmatter-stripped excerpt, source (bm25|graph), and a final score
+// used for ranking before truncation to Limit.
+type contextEntry struct {
+	Slug     string  `json:"slug"`
+	Title    string  `json:"title"`
+	DocPath  string  `json:"doc_path"`
+	Excerpt  string  `json:"excerpt"`
+	EdgeType string  `json:"edge_type"`
+	Score    float64 `json:"score"`
+}
+
+// brainContext returns top-N brain entries relevant to a project context.
+// It runs a BM25 query against the project name, takes the top-3 hits as
+// seeds, expands each seed 2 hops in the brain graph (when configured),
+// then merges and deduplicates by slug. recent_files optionally boosts
+// entries whose doc_path matches a recent file basename.
+func (s *Server) brainContext(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
+	var a brainContextArgs
+	if err := json.Unmarshal(args, &a); err != nil {
+		return nil, fmt.Errorf("parse args: %w", err)
+	}
+	if a.ProjectRoot == "" {
+		return nil, fmt.Errorf("project_root is required")
+	}
+	limit := a.Limit
+	if limit <= 0 {
+		limit = 10
+	}
+
+	projectName := filepath.Base(strings.TrimRight(a.ProjectRoot, "/"))
+	if projectName == "" || projectName == "." || projectName == "/" {
+		return nil, fmt.Errorf("project_root has no usable basename: %q", a.ProjectRoot)
+	}
+
+	// Seed BM25 hits on the project name. Take top-3 as graph expansion seeds.
+	bm25, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
+		Query:    projectName,
+		Limit:    3,
+		Vector:   s.vector,
+		Embedder: s.embedder,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("search: %w", err)
+	}
+
+	// Dedup by slug while merging BM25 hits and graph neighbours.
+	bySlug := make(map[string]*contextEntry)
+	// BM25 score: highest rank gets the largest score, decaying linearly.
+	// Score 3.0 / 2.0 / 1.0 for ranks 0/1/2 respectively.
+	for i, r := range bm25 {
+		slug := slugFromPath(r.Path)
+		if slug == "" {
+			continue
+		}
+		score := float64(len(bm25) - i)
+		bySlug[slug] = &contextEntry{
+			Slug:     slug,
+			Title:    r.Title,
+			DocPath:  r.Path,
+			Excerpt:  truncateExcerpt(r.Excerpt, 200),
+			EdgeType: "bm25",
+			Score:    score,
+		}
+	}
+
+	// Graph expansion: for each BM25 hit, fetch its 2-hop subgraph and
+	// merge those neighbours in with a graph score that decays with hop
+	// distance. Failures are silently dropped — graph augmentation is
+	// best-effort.
+	if reader, ok := s.graph.(graphReader); ok {
+		for _, r := range bm25 {
+			seed := slugFromPath(r.Path)
+			if seed == "" {
+				continue
+			}
+			ns, gerr := reader.Subgraph(ctx, seed, 2)
+			if gerr != nil {
+				continue
+			}
+			for _, n := range ns {
+				if n.Slug == "" || n.Slug == seed {
+					continue
+				}
+				// Graph score: closer hops carry more signal. Distance 1
+				// scores 0.6, distance 2 scores 0.3.
+				gscore := 0.6 / float64(max1(n.Distance))
+				if existing, ok := bySlug[n.Slug]; ok {
+					// Already surfaced via BM25 — bump its score so that
+					// BM25 + graph evidence outranks BM25-only hits.
+					existing.Score += gscore
+					continue
+				}
+				bySlug[n.Slug] = &contextEntry{
+					Slug:     n.Slug,
+					Title:    n.Title,
+					DocPath:  n.DocPath,
+					Excerpt:  readExcerpt(s.brainDir, n.DocPath, 200),
+					EdgeType: "graph",
+					Score:    gscore,
+				}
+			}
+		}
+	}
+
+	// Optional recent_files boost: +1 to entries whose doc_path basename
+	// matches any recent file basename. v1 is intentionally simple.
+	if len(a.RecentFiles) > 0 {
+		recent := make(map[string]struct{}, len(a.RecentFiles))
+		for _, f := range a.RecentFiles {
+			recent[filepath.Base(f)] = struct{}{}
+		}
+		for _, e := range bySlug {
+			if _, hit := recent[filepath.Base(e.DocPath)]; hit {
+				e.Score += 1.0
+			}
+		}
+	}
+
+	// Flatten and sort by score desc, slug asc as a stable tiebreaker.
+	entries := make([]contextEntry, 0, len(bySlug))
+	for _, e := range bySlug {
+		entries = append(entries, *e)
+	}
+	sort.SliceStable(entries, func(i, j int) bool {
+		if entries[i].Score != entries[j].Score {
+			return entries[i].Score > entries[j].Score
+		}
+		return entries[i].Slug < entries[j].Slug
+	})
+	if len(entries) > limit {
+		entries = entries[:limit]
+	}
+
+	return json.Marshal(map[string]any{"entries": entries})
+}
+
+// truncateExcerpt clamps an already-stripped excerpt to maxLen characters
+// without re-running the frontmatter parser. The ellipsis suffix matches
+// the convention used in search.excerpt.
+func truncateExcerpt(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen] + "…"
+}
+
+// readExcerpt loads a doc relative to brainDir, strips its frontmatter,
+// and returns the first maxLen chars. Returns "" on any error — the
+// excerpt is informational, not load-bearing for correctness.
+func readExcerpt(brainDir, relPath string, maxLen int) string {
+	if relPath == "" {
+		return ""
+	}
+	full := filepath.Join(brainDir, filepath.FromSlash(relPath))
+	content, err := os.ReadFile(full)
+	if err != nil {
+		return ""
+	}
+	parts := strings.SplitN(string(content), "---", 3)
+	body := string(content)
+	if len(parts) == 3 {
+		body = strings.TrimSpace(parts[2])
+	}
+	if len(body) > maxLen {
+		return body[:maxLen] + "…"
+	}
+	return body
+}
+
+// max1 returns the maximum of n and 1, used to guard against divide-by-zero
+// on graph distance and to give self-references (distance 0) a sensible
+// score instead of an infinity.
+func max1(n int) int {
+	if n < 1 {
+		return 1
+	}
+	return n
+}