hyperguild/ingestion/internal/mcp/tools_context.go

package mcp

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"strings"

	"github.com/mathiasbq/hyperguild/ingestion/internal/search"
)

// brainContextArgs is the input shape of brain_context. project_root is
// required; recent_files biases ranking when provided; limit caps the
// returned set (default 10).
type brainContextArgs struct {
	ProjectRoot string   `json:"project_root"`
	RecentFiles []string `json:"recent_files,omitempty"`
	Limit       int      `json:"limit,omitempty"`
}

// contextEntry is one returned brain entry: the slug, its title,
// frontmatter-stripped excerpt, source (bm25|graph), and a final score
// used for ranking before truncation to Limit.
type contextEntry struct {
	Slug     string  `json:"slug"`
	Title    string  `json:"title"`
	DocPath  string  `json:"doc_path"`
	Excerpt  string  `json:"excerpt"`
	EdgeType string  `json:"edge_type"`
	Score    float64 `json:"score"`
}

// brainContext returns top-N brain entries relevant to a project context.
// It runs a BM25 query against the project name, takes the top-3 hits as
// seeds, expands each seed 2 hops in the brain graph (when configured),
// then merges and deduplicates by slug. recent_files optionally boosts
// entries whose doc_path matches a recent file basename.
func (s *Server) brainContext(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
	var a brainContextArgs
	if err := json.Unmarshal(args, &a); err != nil {
		return nil, fmt.Errorf("parse args: %w", err)
	}
	if a.ProjectRoot == "" {
		return nil, fmt.Errorf("project_root is required")
	}
	limit := a.Limit
	if limit <= 0 {
		limit = 10
	}

	projectName := filepath.Base(strings.TrimRight(a.ProjectRoot, "/"))
	if projectName == "" || projectName == "." || projectName == "/" {
		return nil, fmt.Errorf("project_root has no usable basename: %q", a.ProjectRoot)
	}

	// Seed BM25 hits on the project name. Take top-3 as graph expansion seeds.
	bm25, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
		Query:    projectName,
		Limit:    3,
		Vector:   s.vector,
		Embedder: s.embedder,
	})
	if err != nil {
		return nil, fmt.Errorf("search: %w", err)
	}

	// Dedup by slug while merging BM25 hits and graph neighbours.
	bySlug := make(map[string]*contextEntry)
	// BM25 score: highest rank gets the largest score, decaying linearly.
	// Score 3.0 / 2.0 / 1.0 for ranks 0/1/2 respectively.
	for i, r := range bm25 {
		slug := slugFromPath(r.Path)
		if slug == "" {
			continue
		}
		score := float64(len(bm25) - i)
		bySlug[slug] = &contextEntry{
			Slug:     slug,
			Title:    r.Title,
			DocPath:  r.Path,
			Excerpt:  truncateExcerpt(r.Excerpt, 200),
			EdgeType: "bm25",
			Score:    score,
		}
	}

	// Graph expansion: for each BM25 hit, fetch its 2-hop subgraph and
	// merge those neighbours in with a graph score that decays with hop
	// distance. Failures are silently dropped — graph augmentation is
	// best-effort.
	if reader, ok := s.graph.(graphReader); ok {
		for _, r := range bm25 {
			seed := slugFromPath(r.Path)
			if seed == "" {
				continue
			}
			ns, gerr := reader.Subgraph(ctx, seed, 2)
			if gerr != nil {
				continue
			}
			for _, n := range ns {
				if n.Slug == "" || n.Slug == seed {
					continue
				}
				// Graph score: closer hops carry more signal. Distance 1
				// scores 0.6, distance 2 scores 0.3.
				gscore := 0.6 / float64(max1(n.Distance))
				if existing, ok := bySlug[n.Slug]; ok {
					// Already surfaced via BM25 — bump its score so that
					// BM25 + graph evidence outranks BM25-only hits.
					existing.Score += gscore
					continue
				}
				bySlug[n.Slug] = &contextEntry{
					Slug:     n.Slug,
					Title:    n.Title,
					DocPath:  n.DocPath,
					Excerpt:  readExcerpt(s.brainDir, n.DocPath, 200),
					EdgeType: "graph",
					Score:    gscore,
				}
			}
		}
	}

	// Optional recent_files boost: +1 to entries whose doc_path basename
	// matches any recent file basename. v1 is intentionally simple.
	if len(a.RecentFiles) > 0 {
		recent := make(map[string]struct{}, len(a.RecentFiles))
		for _, f := range a.RecentFiles {
			recent[filepath.Base(f)] = struct{}{}
		}
		for _, e := range bySlug {
			if _, hit := recent[filepath.Base(e.DocPath)]; hit {
				e.Score += 1.0
			}
		}
	}

	// Flatten and sort by score desc, slug asc as a stable tiebreaker.
	entries := make([]contextEntry, 0, len(bySlug))
	for _, e := range bySlug {
		entries = append(entries, *e)
	}
	sort.SliceStable(entries, func(i, j int) bool {
		if entries[i].Score != entries[j].Score {
			return entries[i].Score > entries[j].Score
		}
		return entries[i].Slug < entries[j].Slug
	})
	if len(entries) > limit {
		entries = entries[:limit]
	}

	return json.Marshal(map[string]any{"entries": entries})
}

// truncateExcerpt clamps an already-stripped excerpt to maxLen characters
// without re-running the frontmatter parser. The ellipsis suffix matches
// the convention used in search.excerpt.
func truncateExcerpt(s string, maxLen int) string {
	if len(s) <= maxLen {
		return s
	}
	return s[:maxLen] + "…"
}

// readExcerpt loads a doc relative to brainDir, strips its frontmatter,
// and returns the first maxLen chars. Returns "" on any error — the
// excerpt is informational, not load-bearing for correctness.
func readExcerpt(brainDir, relPath string, maxLen int) string {
	if relPath == "" {
		return ""
	}
	full := filepath.Join(brainDir, filepath.FromSlash(relPath))
	content, err := os.ReadFile(full)
	if err != nil {
		return ""
	}
	parts := strings.SplitN(string(content), "---", 3)
	body := string(content)
	if len(parts) == 3 {
		body = strings.TrimSpace(parts[2])
	}
	if len(body) > maxLen {
		return body[:maxLen] + "…"
	}
	return body
}

// max1 returns the maximum of n and 1, used to guard against divide-by-zero
// on graph distance and to give self-references (distance 0) a sensible
// score instead of an infinity.
func max1(n int) int {
	if n < 1 {
		return 1
	}
	return n
}