Files
Mathias Bergqvist 0a70d9e972
All checks were successful
CI / Lint / Test / Vet (push) Successful in 9s
CI / Mirror to GitHub (push) Has been skipped
feat(pipeline): add POST /ingest-raw for direct batch ingestion without LLM
Allows callers to provide pre-structured RawPage data directly, bypassing the
LLM extraction step. The pipeline still handles slug computation, frontmatter,
link canonicalization, source back-references, and dedup — only the extraction
is skipped. Useful when a more capable model or manual curation produces the
structured data.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 11:15:59 +02:00

112 lines
4.8 KiB
Go

// internal/skills/brain/skill.go
package brain
import (
"encoding/json"
"github.com/mathiasbq/supervisor/internal/registry"
)
// Config holds brain skill configuration.
type Config struct {
IngestBaseURL string // base URL of the ingestion HTTP server (brain_query, brain_write)
IngestSvcURL string // base URL of the ingestion-svc HTTP server (brain_ingest)
KBRetrievalURL string // base URL of the kb-retrieval server (brain_search)
}
// Skill implements registry.Skill for brain_query and brain_write.
type Skill struct {
cfg Config
}
// New constructs a brain Skill.
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
// Name returns the skill name used for routing.
func (s *Skill) Name() string { return "brain" }
// Tools returns the MCP tool definitions for brain_query and brain_write.
func (s *Skill) Tools() []registry.ToolDef {
schema := func(required []string, props map[string]any) json.RawMessage {
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
return b
}
str := map[string]any{"type": "string"}
num := map[string]any{"type": "integer"}
tools := []registry.ToolDef{
{
Name: "brain_query",
Description: "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files. Fast, no embeddings needed. Call before any significant task.",
InputSchema: schema([]string{"query"}, map[string]any{
"query": str,
"limit": num,
}),
},
{
Name: "brain_write",
Description: "Write a raw knowledge note to brain/knowledge/ for later ingestion.",
InputSchema: schema([]string{"content"}, map[string]any{
"content": str,
"type": str,
"domain": str,
"filename": str,
}),
},
}
if s.cfg.IngestSvcURL != "" {
tools = append(tools, registry.ToolDef{
Name: "brain_ingest_raw",
Description: "Ingest pre-structured pages into the brain wiki, bypassing the LLM extraction step. " +
"Use when you (the calling agent) have already extracted entities, concepts, and content from a source. " +
"Provide source (human-readable name) and pages (array of {title, type, subtype, domain, content} objects). " +
"The pipeline computes slugs, paths, frontmatter, wikilink canonicalization, and source back-references. " +
"Returns the list of wiki pages written.",
InputSchema: schema([]string{"source", "pages"}, map[string]any{
"source": map[string]any{"type": "string", "description": "human-readable name for the source, e.g. 'shape-up-book'"},
"pages": map[string]any{
"type": "array",
"items": map[string]any{
"type": "object",
"required": []string{"title", "type", "content"},
"properties": map[string]any{
"title": map[string]any{"type": "string", "description": "page title, e.g. 'Hash Encoding'"},
"type": map[string]any{"type": "string", "enum": []string{"source", "concept", "entity"}, "description": "page type"},
"subtype": map[string]any{"type": "string", "description": "entity: person|company|tool|model|framework|technology; source: article|pdf|book|video|note|project"},
"domain": map[string]any{"type": "string", "description": "knowledge domain, e.g. 'Machine Learning'"},
"content": map[string]any{"type": "string", "description": "markdown body — no frontmatter, use [[Display Name]] for wikilinks"},
},
},
},
"dry_run": map[string]any{"type": "boolean"},
}),
})
tools = append(tools, registry.ToolDef{
Name: "brain_ingest",
Description: "Ingest content into the brain wiki (brain/wiki/). Calls an LLM to produce structured wiki pages. " +
"Use for substantial documents, articles, or knowledge worth structuring. " +
"Provide EITHER (a) path — absolute path to a file or directory, " +
"OR (b) content + source — raw text and a human-readable name. " +
"Providing both is an error. Returns the list of wiki pages written.",
InputSchema: schema([]string{}, map[string]any{
"content": map[string]any{"type": "string", "description": "raw text to ingest; required when path is not set"},
"source": map[string]any{"type": "string", "description": "human-readable name for the content, e.g. 'shape-up-book'; required when path is not set"},
"path": map[string]any{"type": "string", "description": "absolute path to a file or directory to ingest; mutually exclusive with content+source"},
"dry_run": map[string]any{"type": "boolean"},
}),
})
}
if s.cfg.KBRetrievalURL != "" {
tools = append(tools, registry.ToolDef{
Name: "brain_search",
Description: "Semantic vector search across the brain wiki using embeddings. Use when brain_query returns no results or you need conceptually-related results rather than keyword matches.",
InputSchema: schema([]string{"query"}, map[string]any{
"query": str,
"collection": str,
"limit": num,
}),
})
}
return tools
}