Returns top-N relevant brain entries for a project context. Combines BM25 hits on project name with 2-hop graph expansion via Track A's graphstore (when BRAIN_GRAPH_ENABLED). Closes hyperguild#28. Notes on implementation choices that deviate slightly from the spec: - Excerpt length: 200 chars per spec (vs the 300 used by search.Result). truncateExcerpt clamps the already-stripped BM25 excerpt; graph-only neighbours load their excerpt from disk via a private readExcerpt helper (search.hydrate is unexported). - Graph scoring: 0.6 / max(1, distance) per neighbour, so distance-1 contributes 0.6 and distance-2 contributes 0.3. BM25 hits decay linearly from 3.0 (rank-0) to 1.0 (rank-2), giving BM25 hits a natural ceiling above pure-graph hits while still letting a doc surfaced via both edge types outrank a BM25-only one. - Test placement: package mcp (internal) rather than mcp_test, because graphReader is unexported and WithGraph only accepts *PGStore; an internal test can install a dual-interface fake directly on s.graph without spinning up postgres. Bump-Type: minor Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
203 lines
5.8 KiB
Go
203 lines
5.8 KiB
Go
package mcp
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
|
)
|
|
|
|
// brainContextArgs is the input shape of brain_context. project_root is
|
|
// required; recent_files biases ranking when provided; limit caps the
|
|
// returned set (default 10).
|
|
type brainContextArgs struct {
|
|
ProjectRoot string `json:"project_root"`
|
|
RecentFiles []string `json:"recent_files,omitempty"`
|
|
Limit int `json:"limit,omitempty"`
|
|
}
|
|
|
|
// contextEntry is one returned brain entry: the slug, its title,
|
|
// frontmatter-stripped excerpt, source (bm25|graph), and a final score
|
|
// used for ranking before truncation to Limit.
|
|
type contextEntry struct {
|
|
Slug string `json:"slug"`
|
|
Title string `json:"title"`
|
|
DocPath string `json:"doc_path"`
|
|
Excerpt string `json:"excerpt"`
|
|
EdgeType string `json:"edge_type"`
|
|
Score float64 `json:"score"`
|
|
}
|
|
|
|
// brainContext returns top-N brain entries relevant to a project context.
|
|
// It runs a BM25 query against the project name, takes the top-3 hits as
|
|
// seeds, expands each seed 2 hops in the brain graph (when configured),
|
|
// then merges and deduplicates by slug. recent_files optionally boosts
|
|
// entries whose doc_path matches a recent file basename.
|
|
func (s *Server) brainContext(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
|
var a brainContextArgs
|
|
if err := json.Unmarshal(args, &a); err != nil {
|
|
return nil, fmt.Errorf("parse args: %w", err)
|
|
}
|
|
if a.ProjectRoot == "" {
|
|
return nil, fmt.Errorf("project_root is required")
|
|
}
|
|
limit := a.Limit
|
|
if limit <= 0 {
|
|
limit = 10
|
|
}
|
|
|
|
projectName := filepath.Base(strings.TrimRight(a.ProjectRoot, "/"))
|
|
if projectName == "" || projectName == "." || projectName == "/" {
|
|
return nil, fmt.Errorf("project_root has no usable basename: %q", a.ProjectRoot)
|
|
}
|
|
|
|
// Seed BM25 hits on the project name. Take top-3 as graph expansion seeds.
|
|
bm25, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
|
|
Query: projectName,
|
|
Limit: 3,
|
|
Vector: s.vector,
|
|
Embedder: s.embedder,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search: %w", err)
|
|
}
|
|
|
|
// Dedup by slug while merging BM25 hits and graph neighbours.
|
|
bySlug := make(map[string]*contextEntry)
|
|
// BM25 score: highest rank gets the largest score, decaying linearly.
|
|
// Score 3.0 / 2.0 / 1.0 for ranks 0/1/2 respectively.
|
|
for i, r := range bm25 {
|
|
slug := slugFromPath(r.Path)
|
|
if slug == "" {
|
|
continue
|
|
}
|
|
score := float64(len(bm25) - i)
|
|
bySlug[slug] = &contextEntry{
|
|
Slug: slug,
|
|
Title: r.Title,
|
|
DocPath: r.Path,
|
|
Excerpt: truncateExcerpt(r.Excerpt, 200),
|
|
EdgeType: "bm25",
|
|
Score: score,
|
|
}
|
|
}
|
|
|
|
// Graph expansion: for each BM25 hit, fetch its 2-hop subgraph and
|
|
// merge those neighbours in with a graph score that decays with hop
|
|
// distance. Failures are silently dropped — graph augmentation is
|
|
// best-effort.
|
|
if reader, ok := s.graph.(graphReader); ok {
|
|
for _, r := range bm25 {
|
|
seed := slugFromPath(r.Path)
|
|
if seed == "" {
|
|
continue
|
|
}
|
|
ns, gerr := reader.Subgraph(ctx, seed, 2)
|
|
if gerr != nil {
|
|
continue
|
|
}
|
|
for _, n := range ns {
|
|
if n.Slug == "" || n.Slug == seed {
|
|
continue
|
|
}
|
|
// Graph score: closer hops carry more signal. Distance 1
|
|
// scores 0.6, distance 2 scores 0.3.
|
|
gscore := 0.6 / float64(max1(n.Distance))
|
|
if existing, ok := bySlug[n.Slug]; ok {
|
|
// Already surfaced via BM25 — bump its score so that
|
|
// BM25 + graph evidence outranks BM25-only hits.
|
|
existing.Score += gscore
|
|
continue
|
|
}
|
|
bySlug[n.Slug] = &contextEntry{
|
|
Slug: n.Slug,
|
|
Title: n.Title,
|
|
DocPath: n.DocPath,
|
|
Excerpt: readExcerpt(s.brainDir, n.DocPath, 200),
|
|
EdgeType: "graph",
|
|
Score: gscore,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Optional recent_files boost: +1 to entries whose doc_path basename
|
|
// matches any recent file basename. v1 is intentionally simple.
|
|
if len(a.RecentFiles) > 0 {
|
|
recent := make(map[string]struct{}, len(a.RecentFiles))
|
|
for _, f := range a.RecentFiles {
|
|
recent[filepath.Base(f)] = struct{}{}
|
|
}
|
|
for _, e := range bySlug {
|
|
if _, hit := recent[filepath.Base(e.DocPath)]; hit {
|
|
e.Score += 1.0
|
|
}
|
|
}
|
|
}
|
|
|
|
// Flatten and sort by score desc, slug asc as a stable tiebreaker.
|
|
entries := make([]contextEntry, 0, len(bySlug))
|
|
for _, e := range bySlug {
|
|
entries = append(entries, *e)
|
|
}
|
|
sort.SliceStable(entries, func(i, j int) bool {
|
|
if entries[i].Score != entries[j].Score {
|
|
return entries[i].Score > entries[j].Score
|
|
}
|
|
return entries[i].Slug < entries[j].Slug
|
|
})
|
|
if len(entries) > limit {
|
|
entries = entries[:limit]
|
|
}
|
|
|
|
return json.Marshal(map[string]any{"entries": entries})
|
|
}
|
|
|
|
// truncateExcerpt clamps an already-stripped excerpt to maxLen characters
|
|
// without re-running the frontmatter parser. The ellipsis suffix matches
|
|
// the convention used in search.excerpt.
|
|
func truncateExcerpt(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "…"
|
|
}
|
|
|
|
// readExcerpt loads a doc relative to brainDir, strips its frontmatter,
|
|
// and returns the first maxLen chars. Returns "" on any error — the
|
|
// excerpt is informational, not load-bearing for correctness.
|
|
func readExcerpt(brainDir, relPath string, maxLen int) string {
|
|
if relPath == "" {
|
|
return ""
|
|
}
|
|
full := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
|
content, err := os.ReadFile(full)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
parts := strings.SplitN(string(content), "---", 3)
|
|
body := string(content)
|
|
if len(parts) == 3 {
|
|
body = strings.TrimSpace(parts[2])
|
|
}
|
|
if len(body) > maxLen {
|
|
return body[:maxLen] + "…"
|
|
}
|
|
return body
|
|
}
|
|
|
|
// max1 returns the maximum of n and 1, used to guard against divide-by-zero
|
|
// on graph distance and to give self-references (distance 0) a sensible
|
|
// score instead of an infinity.
|
|
func max1(n int) int {
|
|
if n < 1 {
|
|
return 1
|
|
}
|
|
return n
|
|
}
|