hyperguild/ingestion/internal/mcp/server.go

// Package mcp implements an MCP HTTP handler for the ingestion service.
// Exposed tools: brain_query, brain_write, brain_index, brain_tunnel,
// brain_ingest, brain_ingest_raw, brain_answer, brain_classify, session_log.
package mcp

import (
	"context"
	"encoding/json"
	"fmt"
	"net/http"

	"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
	"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
	"github.com/mathiasbq/hyperguild/ingestion/internal/search"
)

type request struct {
	JSONRPC string          `json:"jsonrpc"`
	ID      any             `json:"id"`
	Method  string          `json:"method"`
	Params  json.RawMessage `json:"params"`
}

type response struct {
	JSONRPC string    `json:"jsonrpc"`
	ID      any       `json:"id,omitempty"`
	Result  any       `json:"result,omitempty"`
	Error   *rpcError `json:"error,omitempty"`
}

type rpcError struct {
	Code    int    `json:"code"`
	Message string `json:"message"`
}

// Server handles MCP JSON-RPC over HTTP for the ingestion service.
type Server struct {
	brainDir  string
	pipeline  pipeline.Config
	llm       pipeline.CompleteFunc
	answerLLM pipeline.CompleteFunc // nil = brain_answer and brain_classify unavailable
	reranker  *reranker.Client      // nil = no rerank, BM25 top-10 → LLM
	vector    search.VectorSearcher // nil = BM25-only retrieval
	embedder  search.Embedder       // nil = BM25-only retrieval
}

// NewServer constructs a Server bound to brainDir. pipelineCfg supplies the
// LLM-backed pipeline; llm may be nil for non-LLM tools only.
// answerLLM drives brain_answer and brain_classify; nil disables those tools.
func NewServer(brainDir string, pipelineCfg *pipeline.Config, llm pipeline.CompleteFunc, answerLLM pipeline.CompleteFunc) *Server {
	cfg := pipeline.Config{}
	if pipelineCfg != nil {
		cfg = *pipelineCfg
	}
	return &Server{brainDir: brainDir, pipeline: cfg, llm: llm, answerLLM: answerLLM}
}

// WithReranker installs an opt-in cross-encoder reranker. When set,
// brain_answer retrieves a wider BM25 candidate set and prunes it to
// the relevant ones before LLM synthesis. Returns the server for
// fluent chaining.
func (s *Server) WithReranker(r *reranker.Client) *Server {
	s.reranker = r
	return s
}

// WithHybridRetrieval wires the embedding store and embedder so
// brain_query and brain_answer run BM25 + pgvector merged via RRF
// instead of BM25 alone. Either nil disables hybrid mode.
func (s *Server) WithHybridRetrieval(v search.VectorSearcher, e search.Embedder) *Server {
	s.vector = v
	s.embedder = e
	return s
}

func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	// MCP streamable HTTP: GET establishes the SSE stream for server-to-client events.
	if r.Method == http.MethodGet {
		w.Header().Set("Content-Type", "text/event-stream")
		w.Header().Set("Cache-Control", "no-cache")
		w.Header().Set("Connection", "keep-alive")
		w.Header().Set("X-Accel-Buffering", "no")
		w.WriteHeader(http.StatusOK)
		if f, ok := w.(http.Flusher); ok {
			f.Flush()
		}
		<-r.Context().Done()
		return
	}

	var req request
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		writeError(w, nil, -32700, "parse error")
		return
	}

	// JSON-RPC 2.0 notifications (no id) must not receive a response.
	if req.ID == nil {
		return
	}

	var result any
	var rpcErr *rpcError

	switch req.Method {
	case "initialize":
		result = map[string]any{
			"protocolVersion": "2024-11-05",
			"capabilities":    map[string]any{"tools": map[string]any{}},
			"serverInfo":      map[string]any{"name": "ingestion-brain", "version": "0.1.0"},
		}

	case "tools/list":
		result = map[string]any{"tools": s.tools()}

	case "tools/call":
		var p struct {
			Name      string          `json:"name"`
			Arguments json.RawMessage `json:"arguments"`
		}
		if err := json.Unmarshal(req.Params, &p); err != nil {
			rpcErr = &rpcError{Code: -32602, Message: "invalid params"}
			break
		}
		out, err := s.handleCall(r.Context(), p.Name, p.Arguments)
		if err != nil {
			rpcErr = &rpcError{Code: -32000, Message: err.Error()}
			break
		}
		result = map[string]any{
			"content": []map[string]any{{"type": "text", "text": string(out)}},
		}

	default:
		rpcErr = &rpcError{Code: -32601, Message: "method not found: " + req.Method}
	}

	w.Header().Set("Content-Type", "application/json")
	_ = json.NewEncoder(w).Encode(response{
		JSONRPC: "2.0",
		ID:      req.ID,
		Result:  result,
		Error:   rpcErr,
	})
}

func writeError(w http.ResponseWriter, id any, code int, msg string) {
	w.Header().Set("Content-Type", "application/json")
	_ = json.NewEncoder(w).Encode(response{
		JSONRPC: "2.0",
		ID:      id,
		Error:   &rpcError{Code: code, Message: msg},
	})
}

// handleCall dispatches a tools/call to the appropriate tool handler.
func (s *Server) handleCall(ctx context.Context, name string, args json.RawMessage) (json.RawMessage, error) {
	switch name {
	case "brain_query":
		return s.brainQuery(ctx, args)
	case "brain_write":
		return s.brainWrite(ctx, args)
	case "brain_index":
		return s.brainIndex(ctx, args)
	case "brain_tunnel":
		return s.brainTunnel(ctx, args)
	case "brain_ingest_raw":
		return s.brainIngestRaw(ctx, args)
	case "brain_ingest":
		return s.brainIngest(ctx, args)
	case "session_log":
		return s.sessionLog(ctx, args)
	case "brain_answer":
		return s.brainAnswer(ctx, args)
	case "brain_classify":
		return s.brainClassify(ctx, args)
	default:
		return nil, fmt.Errorf("unknown tool: %s", name)
	}
}