diff --git a/ingestion/internal/mcp/handlers.go b/ingestion/internal/mcp/handlers.go index 1514785..573cd06 100644 --- a/ingestion/internal/mcp/handlers.go +++ b/ingestion/internal/mcp/handlers.go @@ -122,6 +122,19 @@ func (s *Server) tools() []map[string]any { "depth": int_("max traversal depth for op=subgraph (default 2, clamped to 6) and op=path (default 4, clamped to 8)"), }), }, + { + "name": "brain_context", + "description": "Return top-N relevant brain entries for a project context. Use at session start or before a complex task to load prior decisions, corrections, and surprises.", + "inputSchema": schema([]string{"project_root"}, map[string]any{ + "project_root": str("absolute path to the project root"), + "recent_files": map[string]any{ + "type": "array", + "items": map[string]any{"type": "string"}, + "description": "optional: recent file paths in the project to bias relevance", + }, + "limit": int_("max entries to return, default 10"), + }), + }, { "name": "session_log", "description": "Append a structured entry to brain/sessions/.jsonl.", diff --git a/ingestion/internal/mcp/server.go b/ingestion/internal/mcp/server.go index c10d5f0..0ba1cc0 100644 --- a/ingestion/internal/mcp/server.go +++ b/ingestion/internal/mcp/server.go @@ -1,6 +1,7 @@ // Package mcp implements an MCP HTTP handler for the ingestion service. // Exposed tools: brain_query, brain_write, brain_index, brain_tunnel, -// brain_ingest, brain_ingest_raw, brain_answer, brain_classify, session_log. +// brain_ingest, brain_ingest_raw, brain_answer, brain_classify, +// brain_graph, brain_context, session_log. package mcp import ( @@ -192,6 +193,8 @@ func (s *Server) handleCall(ctx context.Context, name string, args json.RawMessa return s.brainClassify(ctx, args) case "brain_graph": return s.brainGraph(ctx, args) + case "brain_context": + return s.brainContext(ctx, args) default: return nil, fmt.Errorf("unknown tool: %s", name) } diff --git a/ingestion/internal/mcp/server_test.go b/ingestion/internal/mcp/server_test.go index 6727807..4baf35e 100644 --- a/ingestion/internal/mcp/server_test.go +++ b/ingestion/internal/mcp/server_test.go @@ -57,7 +57,8 @@ func TestServerToolsList(t *testing.T) { assert.ElementsMatch(t, []string{ "brain_query", "brain_write", "brain_index", "brain_tunnel", "brain_ingest_raw", "brain_ingest", - "brain_answer", "brain_classify", "brain_graph", "session_log", + "brain_answer", "brain_classify", "brain_graph", "brain_context", + "session_log", }, names) } diff --git a/ingestion/internal/mcp/tools_context.go b/ingestion/internal/mcp/tools_context.go new file mode 100644 index 0000000..14c7f9d --- /dev/null +++ b/ingestion/internal/mcp/tools_context.go @@ -0,0 +1,202 @@ +package mcp + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/mathiasbq/hyperguild/ingestion/internal/search" +) + +// brainContextArgs is the input shape of brain_context. project_root is +// required; recent_files biases ranking when provided; limit caps the +// returned set (default 10). +type brainContextArgs struct { + ProjectRoot string `json:"project_root"` + RecentFiles []string `json:"recent_files,omitempty"` + Limit int `json:"limit,omitempty"` +} + +// contextEntry is one returned brain entry: the slug, its title, +// frontmatter-stripped excerpt, source (bm25|graph), and a final score +// used for ranking before truncation to Limit. +type contextEntry struct { + Slug string `json:"slug"` + Title string `json:"title"` + DocPath string `json:"doc_path"` + Excerpt string `json:"excerpt"` + EdgeType string `json:"edge_type"` + Score float64 `json:"score"` +} + +// brainContext returns top-N brain entries relevant to a project context. +// It runs a BM25 query against the project name, takes the top-3 hits as +// seeds, expands each seed 2 hops in the brain graph (when configured), +// then merges and deduplicates by slug. recent_files optionally boosts +// entries whose doc_path matches a recent file basename. +func (s *Server) brainContext(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { + var a brainContextArgs + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + if a.ProjectRoot == "" { + return nil, fmt.Errorf("project_root is required") + } + limit := a.Limit + if limit <= 0 { + limit = 10 + } + + projectName := filepath.Base(strings.TrimRight(a.ProjectRoot, "/")) + if projectName == "" || projectName == "." || projectName == "/" { + return nil, fmt.Errorf("project_root has no usable basename: %q", a.ProjectRoot) + } + + // Seed BM25 hits on the project name. Take top-3 as graph expansion seeds. + bm25, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{ + Query: projectName, + Limit: 3, + Vector: s.vector, + Embedder: s.embedder, + }) + if err != nil { + return nil, fmt.Errorf("search: %w", err) + } + + // Dedup by slug while merging BM25 hits and graph neighbours. + bySlug := make(map[string]*contextEntry) + // BM25 score: highest rank gets the largest score, decaying linearly. + // Score 3.0 / 2.0 / 1.0 for ranks 0/1/2 respectively. + for i, r := range bm25 { + slug := slugFromPath(r.Path) + if slug == "" { + continue + } + score := float64(len(bm25) - i) + bySlug[slug] = &contextEntry{ + Slug: slug, + Title: r.Title, + DocPath: r.Path, + Excerpt: truncateExcerpt(r.Excerpt, 200), + EdgeType: "bm25", + Score: score, + } + } + + // Graph expansion: for each BM25 hit, fetch its 2-hop subgraph and + // merge those neighbours in with a graph score that decays with hop + // distance. Failures are silently dropped — graph augmentation is + // best-effort. + if reader, ok := s.graph.(graphReader); ok { + for _, r := range bm25 { + seed := slugFromPath(r.Path) + if seed == "" { + continue + } + ns, gerr := reader.Subgraph(ctx, seed, 2) + if gerr != nil { + continue + } + for _, n := range ns { + if n.Slug == "" || n.Slug == seed { + continue + } + // Graph score: closer hops carry more signal. Distance 1 + // scores 0.6, distance 2 scores 0.3. + gscore := 0.6 / float64(max1(n.Distance)) + if existing, ok := bySlug[n.Slug]; ok { + // Already surfaced via BM25 — bump its score so that + // BM25 + graph evidence outranks BM25-only hits. + existing.Score += gscore + continue + } + bySlug[n.Slug] = &contextEntry{ + Slug: n.Slug, + Title: n.Title, + DocPath: n.DocPath, + Excerpt: readExcerpt(s.brainDir, n.DocPath, 200), + EdgeType: "graph", + Score: gscore, + } + } + } + } + + // Optional recent_files boost: +1 to entries whose doc_path basename + // matches any recent file basename. v1 is intentionally simple. + if len(a.RecentFiles) > 0 { + recent := make(map[string]struct{}, len(a.RecentFiles)) + for _, f := range a.RecentFiles { + recent[filepath.Base(f)] = struct{}{} + } + for _, e := range bySlug { + if _, hit := recent[filepath.Base(e.DocPath)]; hit { + e.Score += 1.0 + } + } + } + + // Flatten and sort by score desc, slug asc as a stable tiebreaker. + entries := make([]contextEntry, 0, len(bySlug)) + for _, e := range bySlug { + entries = append(entries, *e) + } + sort.SliceStable(entries, func(i, j int) bool { + if entries[i].Score != entries[j].Score { + return entries[i].Score > entries[j].Score + } + return entries[i].Slug < entries[j].Slug + }) + if len(entries) > limit { + entries = entries[:limit] + } + + return json.Marshal(map[string]any{"entries": entries}) +} + +// truncateExcerpt clamps an already-stripped excerpt to maxLen characters +// without re-running the frontmatter parser. The ellipsis suffix matches +// the convention used in search.excerpt. +func truncateExcerpt(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "…" +} + +// readExcerpt loads a doc relative to brainDir, strips its frontmatter, +// and returns the first maxLen chars. Returns "" on any error — the +// excerpt is informational, not load-bearing for correctness. +func readExcerpt(brainDir, relPath string, maxLen int) string { + if relPath == "" { + return "" + } + full := filepath.Join(brainDir, filepath.FromSlash(relPath)) + content, err := os.ReadFile(full) + if err != nil { + return "" + } + parts := strings.SplitN(string(content), "---", 3) + body := string(content) + if len(parts) == 3 { + body = strings.TrimSpace(parts[2]) + } + if len(body) > maxLen { + return body[:maxLen] + "…" + } + return body +} + +// max1 returns the maximum of n and 1, used to guard against divide-by-zero +// on graph distance and to give self-references (distance 0) a sensible +// score instead of an infinity. +func max1(n int) int { + if n < 1 { + return 1 + } + return n +} diff --git a/ingestion/internal/mcp/tools_context_test.go b/ingestion/internal/mcp/tools_context_test.go new file mode 100644 index 0000000..e179f83 --- /dev/null +++ b/ingestion/internal/mcp/tools_context_test.go @@ -0,0 +1,212 @@ +package mcp + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/graph" + "github.com/mathiasbq/hyperguild/ingestion/internal/graphstore" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeGraph implements graphsync.Store + graphReader so it can be +// assigned to Server.graph and downcast by brainContext. Only Subgraph +// is exercised by brain_context today; the rest are no-op satisfiers. +type fakeGraph struct { + subgraph map[string][]graphstore.Neighbor +} + +func (f *fakeGraph) UpsertEntity(_ context.Context, _ graph.Entity) error { return nil } +func (f *fakeGraph) ReplaceEdgesForDoc(_ context.Context, _ string, _ []graph.Edge) error { + return nil +} +func (f *fakeGraph) DeleteByDoc(_ context.Context, _ string) error { return nil } + +func (f *fakeGraph) Neighbors(_ context.Context, slug, _ string, _ int) ([]graphstore.Neighbor, error) { + return f.subgraph[slug], nil +} + +func (f *fakeGraph) Subgraph(_ context.Context, origin string, _ int) ([]graphstore.Neighbor, error) { + return f.subgraph[origin], nil +} + +func (f *fakeGraph) Path(_ context.Context, _, _ string, _ int) ([]graphstore.PathStep, error) { + return nil, nil +} + +func writeNote(t *testing.T, brainDir, relPath, title, body string) { + t.Helper() + full := filepath.Join(brainDir, filepath.FromSlash(relPath)) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + content := "---\ntitle: " + title + "\n---\n\n" + body + require.NoError(t, os.WriteFile(full, []byte(content), 0o644)) +} + +// callContext runs brainContext directly and decodes the JSON response. +func callContext(t *testing.T, s *Server, args map[string]any) map[string]any { + t.Helper() + raw, err := json.Marshal(args) + require.NoError(t, err) + out, err := s.brainContext(context.Background(), raw) + require.NoError(t, err) + var resp map[string]any + require.NoError(t, json.Unmarshal(out, &resp)) + return resp +} + +func sortedSlugs(entries []any) []string { + slugs := make([]string, 0, len(entries)) + for _, e := range entries { + slugs = append(slugs, e.(map[string]any)["slug"].(string)) + } + sort.Strings(slugs) + return slugs +} + +func TestBrainContext_RejectsMissingProjectRoot(t *testing.T) { + s := NewServer(t.TempDir(), nil, nil, nil) + _, err := s.brainContext(context.Background(), json.RawMessage(`{}`)) + assert.Error(t, err) +} + +func TestBrainContext_RejectsUnusableBasename(t *testing.T) { + s := NewServer(t.TempDir(), nil, nil, nil) + _, err := s.brainContext(context.Background(), json.RawMessage(`{"project_root":"/"}`)) + assert.Error(t, err) +} + +func TestBrainContext_BM25Only_NoGraph(t *testing.T) { + brainDir := t.TempDir() + // Two notes whose body contains the hyphenated project name. BM25 + // uses literal substring matching after whitespace tokenisation, so + // the bodies must carry "azure-tiger" verbatim, not "Azure tiger". + writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-routing.md", + "Azure Tiger Routing", "azure-tiger payment routing decisions.") + writeNote(t, brainDir, "wiki/finance/facts/iso20022.md", + "Azure Tiger ISO 20022 fields", "azure-tiger maps invoice fields to ISO 20022.") + + s := NewServer(brainDir, nil, nil, nil) + // graph is nil — only BM25 hits should appear. + + resp := callContext(t, s, map[string]any{ + "project_root": "/home/mathias/dev/QKX/azure-tiger", + }) + entries := resp["entries"].([]any) + require.NotEmpty(t, entries, "expected at least one BM25 hit on project name") + + for _, e := range entries { + entry := e.(map[string]any) + assert.Equal(t, "bm25", entry["edge_type"], "no graph configured, every entry must be BM25") + assert.NotEmpty(t, entry["slug"]) + assert.NotEmpty(t, entry["doc_path"]) + } +} + +func TestBrainContext_BM25PlusGraphExpansion(t *testing.T) { + brainDir := t.TempDir() + // BM25 seed — body carries the hyphenated project name verbatim. + writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-routing.md", + "Azure Tiger Routing", "azure-tiger payment routing decisions.") + // Graph neighbour — does NOT match BM25 on "azure-tiger" so it can + // only arrive via the graph subgraph traversal. + writeNote(t, brainDir, "wiki/finance/facts/sepa-clearing.md", + "SEPA Clearing", "SEPA payment clearing rules and timing windows.") + + graphFake := &fakeGraph{ + subgraph: map[string][]graphstore.Neighbor{ + "azure-tiger-routing": { + { + Slug: "sepa-clearing", + Title: "SEPA Clearing", + DocPath: "wiki/finance/facts/sepa-clearing.md", + EdgeType: "wikilink", + Distance: 1, + }, + }, + }, + } + s := NewServer(brainDir, nil, nil, nil) + s.graph = graphFake + + resp := callContext(t, s, map[string]any{ + "project_root": "/home/mathias/dev/QKX/azure-tiger", + }) + entries := resp["entries"].([]any) + require.GreaterOrEqual(t, len(entries), 2, "expected BM25 seed plus graph neighbour") + + slugs := sortedSlugs(entries) + assert.Contains(t, slugs, "azure-tiger-routing", "BM25 seed must appear") + assert.Contains(t, slugs, "sepa-clearing", "graph neighbour must appear") + + // Verify the graph-only entry carries edge_type="graph". + var sepaEntry map[string]any + for _, e := range entries { + m := e.(map[string]any) + if m["slug"] == "sepa-clearing" { + sepaEntry = m + break + } + } + require.NotNil(t, sepaEntry) + assert.Equal(t, "graph", sepaEntry["edge_type"]) + assert.NotEmpty(t, sepaEntry["excerpt"], "excerpt should be loaded from disk for graph neighbours") +} + +func TestBrainContext_LimitClamps(t *testing.T) { + brainDir := t.TempDir() + // Five notes all matching "azure-tiger". + for i, name := range []string{"a", "b", "c", "d", "e"} { + writeNote(t, brainDir, + "wiki/finance/decisions/azure-tiger-"+name+".md", + "Azure Tiger "+name, + "azure-tiger note "+name+" with index "+string(rune('0'+i))) + } + s := NewServer(brainDir, nil, nil, nil) + resp := callContext(t, s, map[string]any{ + "project_root": "/home/mathias/dev/QKX/azure-tiger", + "limit": 2, + }) + entries := resp["entries"].([]any) + assert.LessOrEqual(t, len(entries), 2) +} + +func TestBrainContext_RecentFilesBoost(t *testing.T) { + brainDir := t.TempDir() + // Both notes BM25-match the project name, but azure-tiger-z has + // twice the term frequency so it naturally ranks above azure-tiger-a. + // The recent_files boost on azure-tiger-a should pull it level on + // score; the alphabetical slug tiebreaker (a < z) then promotes it + // to the top — exercising both the boost and the deterministic + // tiebreak. + writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-a.md", + "A", "azure-tiger note about a.") + writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-z.md", + "Z", "azure-tiger azure-tiger note about z.") + + s := NewServer(brainDir, nil, nil, nil) + + // Baseline ranking: azure-tiger-z must lead (higher term frequency). + baseline := callContext(t, s, map[string]any{ + "project_root": "/home/mathias/dev/QKX/azure-tiger", + }) + baselineEntries := baseline["entries"].([]any) + require.GreaterOrEqual(t, len(baselineEntries), 2) + baselineTop := baselineEntries[0].(map[string]any) + require.Equal(t, "azure-tiger-z", baselineTop["slug"], + "sanity: higher tf must rank first without a boost") + + // With boost on azure-tiger-a — boosted entry must now lead. + boosted := callContext(t, s, map[string]any{ + "project_root": "/home/mathias/dev/QKX/azure-tiger", + "recent_files": []string{"/some/where/azure-tiger-a.md"}, + }) + entries := boosted["entries"].([]any) + require.GreaterOrEqual(t, len(entries), 2) + top := entries[0].(map[string]any) + assert.Equal(t, "azure-tiger-a", top["slug"], "recent_files boost must promote the matching doc") +}