hyperguild/ingestion/internal/mcp/tools_answer.go

package mcp

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"

	"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
	"github.com/mathiasbq/hyperguild/ingestion/internal/search"
)

// rerankResults scores each candidate's excerpt against the query and
// returns up to top results whose score is positive, preserving the
// caller's input order (BM25 rank) within the kept set. The reranker is
// a filter: ties are broken by BM25, not by the reranker's binary score.
func rerankResults(ctx context.Context, rr *reranker.Client, query string, results []search.Result, top int) ([]search.Result, error) {
	docs := make([]string, len(results))
	for i, r := range results {
		docs[i] = r.Excerpt
	}
	scores, err := rr.Score(ctx, query, docs)
	if err != nil {
		return nil, err
	}
	kept := make([]search.Result, 0, top)
	for i, r := range results {
		if scores[i] > 0 {
			kept = append(kept, r)
		}
		if len(kept) == top {
			break
		}
	}
	return kept, nil
}

const (
	answerSystemPrompt = `You are a knowledge assistant. Answer the question using ONLY the provided sources.
Cite source file paths inline when referencing specific content.
If the context does not contain enough information to answer, say so clearly.`

	classifySystemPrompt = `Classify the document. Respond with JSON only, no markdown fences.
{"type":"...","title":"...","tags":["..."]}
Valid types: spec, plan, decision, note, wiki, log, code, unknown.`
)

type brainAnswerArgs struct {
	Query string `json:"query"`
}

func (s *Server) brainAnswer(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
	if s.answerLLM == nil {
		return nil, fmt.Errorf("answer LLM not configured: set BRAIN_LLM_PRIMARY_URL")
	}
	var a brainAnswerArgs
	if err := json.Unmarshal(args, &a); err != nil {
		return nil, fmt.Errorf("parse args: %w", err)
	}
	if a.Query == "" {
		return nil, fmt.Errorf("query is required")
	}

	// With reranker disabled: BM25 top-10 straight to the LLM.
	// With reranker enabled: BM25 top-20 → cross-encoder filter → top-5.
	bm25Limit := 10
	if s.reranker != nil {
		bm25Limit = 20
	}
	results, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
		Query:    a.Query,
		Limit:    bm25Limit,
		Vector:   s.vector,
		Embedder: s.embedder,
	})
	if err != nil {
		return nil, fmt.Errorf("search: %w", err)
	}
	if s.reranker != nil && len(results) > 0 {
		results, err = rerankResults(ctx, s.reranker, a.Query, results, 5)
		if err != nil {
			return nil, fmt.Errorf("rerank: %w", err)
		}
	}
	if len(results) == 0 {
		return json.Marshal(map[string]any{
			"answer":  "No relevant content found in brain.",
			"sources": []string{},
		})
	}

	var sb strings.Builder
	sources := make([]string, 0, len(results))
	for _, r := range results {
		fmt.Fprintf(&sb, "<source path=%q>\n%s\n</source>\n\n", r.Path, r.Excerpt)
		sources = append(sources, r.Path)
	}

	// GraphRAG augmentation: when the graph is wired, attach the 1-hop
	// outgoing neighbourhood of the top BM25/rerank hit as an extra
	// context block. The LLM can ignore it when irrelevant; when the
	// neighbour adds signal we don't need a second retrieval pass.
	// Failures are silently skipped — graph is augmentation, not
	// correctness.
	if reader, ok := s.graph.(graphReader); ok && len(results) > 0 {
		topSlug := slugFromPath(results[0].Path)
		if topSlug != "" {
			if ns, gerr := reader.Subgraph(ctx, topSlug, 1); gerr == nil && len(ns) > 0 {
				sb.WriteString("<related>\n")
				for _, n := range ns {
					label := n.Title
					if label == "" {
						label = n.Slug
					}
					fmt.Fprintf(&sb, "- %s (%s) at %s\n", label, n.EdgeType, n.DocPath)
				}
				sb.WriteString("</related>\n\n")
			}
		}
	}

	answer, err := s.answerLLM(ctx, answerSystemPrompt, sb.String()+"Question: "+a.Query)
	if err != nil {
		return nil, fmt.Errorf("llm: %w", err)
	}

	return json.Marshal(map[string]any{
		"answer":  answer,
		"sources": sources,
	})
}

// slugFromPath converts "wiki/concepts/foo.md" → "foo".
// Returns "" when path has no .md suffix or empty basename.
func slugFromPath(path string) string {
	if path == "" {
		return ""
	}
	// strip directory
	for i := len(path) - 1; i >= 0; i-- {
		if path[i] == '/' {
			path = path[i+1:]
			break
		}
	}
	if !strings.HasSuffix(path, ".md") {
		return ""
	}
	return strings.TrimSuffix(path, ".md")
}

type brainClassifyArgs struct {
	Text string `json:"text"`
}

type classifyResult struct {
	Type  string   `json:"type"`
	Title string   `json:"title"`
	Tags  []string `json:"tags"`
}

func (s *Server) brainClassify(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
	if s.answerLLM == nil {
		return nil, fmt.Errorf("answer LLM not configured: set BRAIN_LLM_PRIMARY_URL")
	}
	var a brainClassifyArgs
	if err := json.Unmarshal(args, &a); err != nil {
		return nil, fmt.Errorf("parse args: %w", err)
	}
	if a.Text == "" {
		return nil, fmt.Errorf("text is required")
	}

	text := a.Text
	if len(text) > 3000 {
		text = text[:3000]
	}

	raw, err := s.answerLLM(ctx, classifySystemPrompt, text)
	if err != nil {
		return nil, fmt.Errorf("llm: %w", err)
	}

	// Strip markdown fences if model adds them despite the instruction.
	raw = strings.TrimSpace(raw)
	raw = strings.TrimPrefix(raw, "```json")
	raw = strings.TrimPrefix(raw, "```")
	raw = strings.TrimSuffix(raw, "```")
	raw = strings.TrimSpace(raw)

	var cr classifyResult
	if err := json.Unmarshal([]byte(raw), &cr); err != nil {
		return nil, fmt.Errorf("parse classify response %q: %w", raw, err)
	}
	if cr.Tags == nil {
		cr.Tags = []string{}
	}
	return json.Marshal(cr)
}