Returns top-N relevant brain entries for a project context. Combines BM25 hits on project name with 2-hop graph expansion via Track A's graphstore (when BRAIN_GRAPH_ENABLED). Closes hyperguild#28. Notes on implementation choices that deviate slightly from the spec: - Excerpt length: 200 chars per spec (vs the 300 used by search.Result). truncateExcerpt clamps the already-stripped BM25 excerpt; graph-only neighbours load their excerpt from disk via a private readExcerpt helper (search.hydrate is unexported). - Graph scoring: 0.6 / max(1, distance) per neighbour, so distance-1 contributes 0.6 and distance-2 contributes 0.3. BM25 hits decay linearly from 3.0 (rank-0) to 1.0 (rank-2), giving BM25 hits a natural ceiling above pure-graph hits while still letting a doc surfaced via both edge types outrank a BM25-only one. - Test placement: package mcp (internal) rather than mcp_test, because graphReader is unexported and WithGraph only accepts *PGStore; an internal test can install a dual-interface fake directly on s.graph without spinning up postgres. Bump-Type: minor Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
213 lines
7.4 KiB
Go
213 lines
7.4 KiB
Go
package mcp
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"testing"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/graph"
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/graphstore"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// fakeGraph implements graphsync.Store + graphReader so it can be
|
|
// assigned to Server.graph and downcast by brainContext. Only Subgraph
|
|
// is exercised by brain_context today; the rest are no-op satisfiers.
|
|
type fakeGraph struct {
|
|
subgraph map[string][]graphstore.Neighbor
|
|
}
|
|
|
|
func (f *fakeGraph) UpsertEntity(_ context.Context, _ graph.Entity) error { return nil }
|
|
func (f *fakeGraph) ReplaceEdgesForDoc(_ context.Context, _ string, _ []graph.Edge) error {
|
|
return nil
|
|
}
|
|
func (f *fakeGraph) DeleteByDoc(_ context.Context, _ string) error { return nil }
|
|
|
|
func (f *fakeGraph) Neighbors(_ context.Context, slug, _ string, _ int) ([]graphstore.Neighbor, error) {
|
|
return f.subgraph[slug], nil
|
|
}
|
|
|
|
func (f *fakeGraph) Subgraph(_ context.Context, origin string, _ int) ([]graphstore.Neighbor, error) {
|
|
return f.subgraph[origin], nil
|
|
}
|
|
|
|
func (f *fakeGraph) Path(_ context.Context, _, _ string, _ int) ([]graphstore.PathStep, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func writeNote(t *testing.T, brainDir, relPath, title, body string) {
|
|
t.Helper()
|
|
full := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
|
content := "---\ntitle: " + title + "\n---\n\n" + body
|
|
require.NoError(t, os.WriteFile(full, []byte(content), 0o644))
|
|
}
|
|
|
|
// callContext runs brainContext directly and decodes the JSON response.
|
|
func callContext(t *testing.T, s *Server, args map[string]any) map[string]any {
|
|
t.Helper()
|
|
raw, err := json.Marshal(args)
|
|
require.NoError(t, err)
|
|
out, err := s.brainContext(context.Background(), raw)
|
|
require.NoError(t, err)
|
|
var resp map[string]any
|
|
require.NoError(t, json.Unmarshal(out, &resp))
|
|
return resp
|
|
}
|
|
|
|
func sortedSlugs(entries []any) []string {
|
|
slugs := make([]string, 0, len(entries))
|
|
for _, e := range entries {
|
|
slugs = append(slugs, e.(map[string]any)["slug"].(string))
|
|
}
|
|
sort.Strings(slugs)
|
|
return slugs
|
|
}
|
|
|
|
func TestBrainContext_RejectsMissingProjectRoot(t *testing.T) {
|
|
s := NewServer(t.TempDir(), nil, nil, nil)
|
|
_, err := s.brainContext(context.Background(), json.RawMessage(`{}`))
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestBrainContext_RejectsUnusableBasename(t *testing.T) {
|
|
s := NewServer(t.TempDir(), nil, nil, nil)
|
|
_, err := s.brainContext(context.Background(), json.RawMessage(`{"project_root":"/"}`))
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestBrainContext_BM25Only_NoGraph(t *testing.T) {
|
|
brainDir := t.TempDir()
|
|
// Two notes whose body contains the hyphenated project name. BM25
|
|
// uses literal substring matching after whitespace tokenisation, so
|
|
// the bodies must carry "azure-tiger" verbatim, not "Azure tiger".
|
|
writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-routing.md",
|
|
"Azure Tiger Routing", "azure-tiger payment routing decisions.")
|
|
writeNote(t, brainDir, "wiki/finance/facts/iso20022.md",
|
|
"Azure Tiger ISO 20022 fields", "azure-tiger maps invoice fields to ISO 20022.")
|
|
|
|
s := NewServer(brainDir, nil, nil, nil)
|
|
// graph is nil — only BM25 hits should appear.
|
|
|
|
resp := callContext(t, s, map[string]any{
|
|
"project_root": "/home/mathias/dev/QKX/azure-tiger",
|
|
})
|
|
entries := resp["entries"].([]any)
|
|
require.NotEmpty(t, entries, "expected at least one BM25 hit on project name")
|
|
|
|
for _, e := range entries {
|
|
entry := e.(map[string]any)
|
|
assert.Equal(t, "bm25", entry["edge_type"], "no graph configured, every entry must be BM25")
|
|
assert.NotEmpty(t, entry["slug"])
|
|
assert.NotEmpty(t, entry["doc_path"])
|
|
}
|
|
}
|
|
|
|
func TestBrainContext_BM25PlusGraphExpansion(t *testing.T) {
|
|
brainDir := t.TempDir()
|
|
// BM25 seed — body carries the hyphenated project name verbatim.
|
|
writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-routing.md",
|
|
"Azure Tiger Routing", "azure-tiger payment routing decisions.")
|
|
// Graph neighbour — does NOT match BM25 on "azure-tiger" so it can
|
|
// only arrive via the graph subgraph traversal.
|
|
writeNote(t, brainDir, "wiki/finance/facts/sepa-clearing.md",
|
|
"SEPA Clearing", "SEPA payment clearing rules and timing windows.")
|
|
|
|
graphFake := &fakeGraph{
|
|
subgraph: map[string][]graphstore.Neighbor{
|
|
"azure-tiger-routing": {
|
|
{
|
|
Slug: "sepa-clearing",
|
|
Title: "SEPA Clearing",
|
|
DocPath: "wiki/finance/facts/sepa-clearing.md",
|
|
EdgeType: "wikilink",
|
|
Distance: 1,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
s := NewServer(brainDir, nil, nil, nil)
|
|
s.graph = graphFake
|
|
|
|
resp := callContext(t, s, map[string]any{
|
|
"project_root": "/home/mathias/dev/QKX/azure-tiger",
|
|
})
|
|
entries := resp["entries"].([]any)
|
|
require.GreaterOrEqual(t, len(entries), 2, "expected BM25 seed plus graph neighbour")
|
|
|
|
slugs := sortedSlugs(entries)
|
|
assert.Contains(t, slugs, "azure-tiger-routing", "BM25 seed must appear")
|
|
assert.Contains(t, slugs, "sepa-clearing", "graph neighbour must appear")
|
|
|
|
// Verify the graph-only entry carries edge_type="graph".
|
|
var sepaEntry map[string]any
|
|
for _, e := range entries {
|
|
m := e.(map[string]any)
|
|
if m["slug"] == "sepa-clearing" {
|
|
sepaEntry = m
|
|
break
|
|
}
|
|
}
|
|
require.NotNil(t, sepaEntry)
|
|
assert.Equal(t, "graph", sepaEntry["edge_type"])
|
|
assert.NotEmpty(t, sepaEntry["excerpt"], "excerpt should be loaded from disk for graph neighbours")
|
|
}
|
|
|
|
func TestBrainContext_LimitClamps(t *testing.T) {
|
|
brainDir := t.TempDir()
|
|
// Five notes all matching "azure-tiger".
|
|
for i, name := range []string{"a", "b", "c", "d", "e"} {
|
|
writeNote(t, brainDir,
|
|
"wiki/finance/decisions/azure-tiger-"+name+".md",
|
|
"Azure Tiger "+name,
|
|
"azure-tiger note "+name+" with index "+string(rune('0'+i)))
|
|
}
|
|
s := NewServer(brainDir, nil, nil, nil)
|
|
resp := callContext(t, s, map[string]any{
|
|
"project_root": "/home/mathias/dev/QKX/azure-tiger",
|
|
"limit": 2,
|
|
})
|
|
entries := resp["entries"].([]any)
|
|
assert.LessOrEqual(t, len(entries), 2)
|
|
}
|
|
|
|
func TestBrainContext_RecentFilesBoost(t *testing.T) {
|
|
brainDir := t.TempDir()
|
|
// Both notes BM25-match the project name, but azure-tiger-z has
|
|
// twice the term frequency so it naturally ranks above azure-tiger-a.
|
|
// The recent_files boost on azure-tiger-a should pull it level on
|
|
// score; the alphabetical slug tiebreaker (a < z) then promotes it
|
|
// to the top — exercising both the boost and the deterministic
|
|
// tiebreak.
|
|
writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-a.md",
|
|
"A", "azure-tiger note about a.")
|
|
writeNote(t, brainDir, "wiki/finance/decisions/azure-tiger-z.md",
|
|
"Z", "azure-tiger azure-tiger note about z.")
|
|
|
|
s := NewServer(brainDir, nil, nil, nil)
|
|
|
|
// Baseline ranking: azure-tiger-z must lead (higher term frequency).
|
|
baseline := callContext(t, s, map[string]any{
|
|
"project_root": "/home/mathias/dev/QKX/azure-tiger",
|
|
})
|
|
baselineEntries := baseline["entries"].([]any)
|
|
require.GreaterOrEqual(t, len(baselineEntries), 2)
|
|
baselineTop := baselineEntries[0].(map[string]any)
|
|
require.Equal(t, "azure-tiger-z", baselineTop["slug"],
|
|
"sanity: higher tf must rank first without a boost")
|
|
|
|
// With boost on azure-tiger-a — boosted entry must now lead.
|
|
boosted := callContext(t, s, map[string]any{
|
|
"project_root": "/home/mathias/dev/QKX/azure-tiger",
|
|
"recent_files": []string{"/some/where/azure-tiger-a.md"},
|
|
})
|
|
entries := boosted["entries"].([]any)
|
|
require.GreaterOrEqual(t, len(entries), 2)
|
|
top := entries[0].(map[string]any)
|
|
assert.Equal(t, "azure-tiger-a", top["slug"], "recent_files boost must promote the matching doc")
|
|
}
|