diff --git a/ingestion/cmd/server/main.go b/ingestion/cmd/server/main.go index 65bf27e..72fbf31 100644 --- a/ingestion/cmd/server/main.go +++ b/ingestion/cmd/server/main.go @@ -94,6 +94,7 @@ func main() { mux := http.NewServeMux() mux.HandleFunc("POST /query", h.Query) mux.HandleFunc("POST /write", h.Write) + mux.HandleFunc("POST /index", h.Index) mux.HandleFunc("POST /ingest", h.Ingest) mux.HandleFunc("POST /ingest-path", h.IngestPath) mux.HandleFunc("POST /ingest-raw", h.IngestRaw) diff --git a/ingestion/internal/api/handler.go b/ingestion/internal/api/handler.go index 90932f5..1789e5e 100644 --- a/ingestion/internal/api/handler.go +++ b/ingestion/internal/api/handler.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/mathiasbq/hyperguild/ingestion/internal/brain" "github.com/mathiasbq/hyperguild/ingestion/internal/extract" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/search" @@ -34,6 +35,8 @@ func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Confi type queryRequest struct { Query string `json:"query"` Limit int `json:"limit,omitempty"` + Wing string `json:"wing,omitempty"` + Hall string `json:"hall,omitempty"` } type writeRequest struct { @@ -41,6 +44,8 @@ type writeRequest struct { Filename string `json:"filename,omitempty"` Type string `json:"type,omitempty"` Domain string `json:"domain,omitempty"` + Wing string `json:"wing,omitempty"` + Hall string `json:"hall,omitempty"` } type ingestRequest struct { @@ -75,7 +80,12 @@ func (h *Handler) Query(w http.ResponseWriter, r *http.Request) { req.Limit = 5 } - results, err := search.Query(h.brainDir, req.Query, req.Limit) + results, err := search.Query(h.brainDir, search.QueryOptions{ + Query: req.Query, + Limit: req.Limit, + Wing: req.Wing, + Hall: req.Hall, + }) if err != nil { h.logger.Error("query failed", "err", err) writeError(w, http.StatusInternalServerError, "search error") @@ -85,13 +95,78 @@ func (h *Handler) Query(w http.ResponseWriter, r *http.Request) { writeJSON(w, map[string]any{"results": results}) } -// WriteNote writes a markdown file to brainDir/knowledge/, optionally -// prefixed with YAML frontmatter built from typ and domain. Returns the path +// WriteNoteOptions configures how a brain note is written. +// +// When both Wing and Hall are non-empty, the note routes into the +// structured wiki at brain/wiki///.md and gets +// wing/hall/created_at injected into its YAML frontmatter. +// +// When either is empty, the note falls back to brain/knowledge/ +// with optional type/domain frontmatter (legacy behaviour). +type WriteNoteOptions struct { + Content string + Filename string + Type string + Domain string + Wing string + Hall string +} + +// WriteNote writes a markdown note into the brain. Returns the path // relative to brainDir (forward-slashed). Filename traversal is rejected. -func WriteNote(brainDir, content, filename, typ, domain string) (string, error) { - if content == "" { +func WriteNote(brainDir string, opts WriteNoteOptions) (string, error) { + if opts.Content == "" { return "", fmt.Errorf("content is required") } + + if opts.Wing != "" && opts.Hall != "" { + return writeHallNote(brainDir, opts) + } + if opts.Wing != "" || opts.Hall != "" { + return "", fmt.Errorf("wing and hall must be set together") + } + return writeLegacyNote(brainDir, opts) +} + +// writeHallNote routes a note into brain/wiki/// and injects +// wing/hall/created_at frontmatter. +func writeHallNote(brainDir string, opts WriteNoteOptions) (string, error) { + slug := opts.Filename + if slug == "" { + slug = time.Now().UTC().Format("2006-01-02-150405") + "-auto" + } + dest, err := brain.NotePath(brainDir, opts.Wing, opts.Hall, slug) + if err != nil { + return "", err + } + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return "", fmt.Errorf("create hall dir: %w", err) + } + + var fm strings.Builder + fm.WriteString("---\n") + fmt.Fprintf(&fm, "wing: %s\n", brain.Sanitise(opts.Wing)) + fmt.Fprintf(&fm, "hall: %s\n", opts.Hall) + fmt.Fprintf(&fm, "created_at: %s\n", time.Now().UTC().Format(time.RFC3339)) + if opts.Type != "" { + fmt.Fprintf(&fm, "type: %s\n", opts.Type) + } + if opts.Domain != "" { + fmt.Fprintf(&fm, "domain: %s\n", opts.Domain) + } + fm.WriteString("---\n") + + if err := os.WriteFile(dest, []byte(fm.String()+opts.Content), 0o644); err != nil { + return "", fmt.Errorf("write: %w", err) + } + rel, _ := filepath.Rel(brainDir, dest) + return filepath.ToSlash(rel), nil +} + +// writeLegacyNote preserves the original brain/knowledge/ behaviour for +// callers that have not adopted the wing/hall taxonomy. +func writeLegacyNote(brainDir string, opts WriteNoteOptions) (string, error) { + filename := opts.Filename if filename == "" { filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405")) } @@ -101,26 +176,24 @@ func WriteNote(brainDir, content, filename, typ, domain string) (string, error) return "", fmt.Errorf("create raw dir: %w", err) } - finalContent := content - if typ != "" || domain != "" { + finalContent := opts.Content + if opts.Type != "" || opts.Domain != "" { var fm strings.Builder fm.WriteString("---\n") - if typ != "" { - fmt.Fprintf(&fm, "type: %s\n", typ) + if opts.Type != "" { + fmt.Fprintf(&fm, "type: %s\n", opts.Type) } - if domain != "" { - fmt.Fprintf(&fm, "domain: %s\n", domain) + if opts.Domain != "" { + fmt.Fprintf(&fm, "domain: %s\n", opts.Domain) } fm.WriteString("---\n") - finalContent = fm.String() + content + finalContent = fm.String() + opts.Content } - // Reject path separators outright; any non-flat filename is misuse. if strings.ContainsAny(filename, `/\`) { return "", fmt.Errorf("invalid filename") } base := filepath.Base(filename) - // After Base, "." and ".." remain. Reject those before adding .md. if base == "." || base == ".." || base == "" { return "", fmt.Errorf("invalid filename") } @@ -143,15 +216,51 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) { writeError(w, http.StatusBadRequest, "invalid JSON") return } - relPath, err := WriteNote(h.brainDir, req.Content, req.Filename, req.Type, req.Domain) + relPath, err := WriteNote(h.brainDir, WriteNoteOptions(req)) if err != nil { h.logger.Error("write failed", "err", err) writeError(w, http.StatusBadRequest, err.Error()) return } + if req.Wing != "" && req.Hall != "" { + if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil { + h.logger.Warn("auto-index failed", "wing", req.Wing, "err", err) + } + } writeJSON(w, map[string]string{"path": relPath}) } +type indexRequest struct { + Wing string `json:"wing,omitempty"` +} + +// Index handles POST /index — regenerate the _index.md MOC for one wing +// (when "wing" is set) or for every wing (when omitted). +func (h *Handler) Index(w http.ResponseWriter, r *http.Request) { + var req indexRequest + if r.ContentLength > 0 { + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid JSON") + return + } + } + if req.Wing == "" { + if err := brain.BuildAllWingIndexes(h.brainDir); err != nil { + h.logger.Error("index all failed", "err", err) + writeError(w, http.StatusInternalServerError, "index error") + return + } + writeJSON(w, map[string]any{"status": "ok", "scope": "all"}) + return + } + if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil { + h.logger.Error("index failed", "wing", req.Wing, "err", err) + writeError(w, http.StatusBadRequest, err.Error()) + return + } + writeJSON(w, map[string]any{"status": "ok", "scope": req.Wing}) +} + // Ingest handles POST /ingest — run the pipeline on provided content. func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) { var req ingestRequest diff --git a/ingestion/internal/brain/index.go b/ingestion/internal/brain/index.go new file mode 100644 index 0000000..7f67e7b --- /dev/null +++ b/ingestion/internal/brain/index.go @@ -0,0 +1,161 @@ +package brain + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +// noteEntry is one row in a Wing _index.md. +type noteEntry struct { + Hall string + Slug string + Title string + Created string +} + +// BuildWingIndex regenerates brain/wiki//_index.md as a Map of +// Content listing every note in that wing with its Hall and creation +// date. Returns nil if the wing directory does not exist. +func BuildWingIndex(brainDir, wing string) error { + w := Sanitise(wing) + if w == "" { + return fmt.Errorf("invalid wing %q", wing) + } + wingDir := filepath.Join(brainDir, "wiki", w) + if _, err := os.Stat(wingDir); os.IsNotExist(err) { + return nil + } else if err != nil { + return fmt.Errorf("stat wing: %w", err) + } + + entries, err := collectWingEntries(wingDir) + if err != nil { + return err + } + sort.Slice(entries, func(i, j int) bool { + if entries[i].Hall != entries[j].Hall { + return entries[i].Hall < entries[j].Hall + } + return entries[i].Slug < entries[j].Slug + }) + + var b strings.Builder + fmt.Fprintf(&b, "# %s\n\n", w) + b.WriteString("| Hall | Note | Created |\n") + b.WriteString("|------|------|---------|\n") + for _, e := range entries { + fmt.Fprintf(&b, "| %s | [%s](%s/%s.md) | %s |\n", e.Hall, e.Title, e.Hall, e.Slug, e.Created) + } + + dest := filepath.Join(wingDir, "_index.md") + return os.WriteFile(dest, []byte(b.String()), 0o644) +} + +// BuildAllWingIndexes regenerates _index.md for every wing under brain/wiki/. +func BuildAllWingIndexes(brainDir string) error { + wikiDir := filepath.Join(brainDir, "wiki") + ents, err := os.ReadDir(wikiDir) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return fmt.Errorf("read wiki: %w", err) + } + for _, e := range ents { + if !e.IsDir() { + continue + } + if err := BuildWingIndex(brainDir, e.Name()); err != nil { + return fmt.Errorf("index %s: %w", e.Name(), err) + } + } + return nil +} + +func collectWingEntries(wingDir string) ([]noteEntry, error) { + var out []noteEntry + ents, err := os.ReadDir(wingDir) + if err != nil { + return nil, fmt.Errorf("read wing: %w", err) + } + for _, hallEnt := range ents { + if !hallEnt.IsDir() { + continue + } + hall := hallEnt.Name() + if !IsValidHall(hall) { + continue + } + hallDir := filepath.Join(wingDir, hall) + notes, err := os.ReadDir(hallDir) + if err != nil { + return nil, fmt.Errorf("read hall %s: %w", hall, err) + } + for _, n := range notes { + if n.IsDir() || !strings.HasSuffix(n.Name(), ".md") || n.Name() == "_index.md" { + continue + } + slug := strings.TrimSuffix(n.Name(), ".md") + full := filepath.Join(hallDir, n.Name()) + title, created := readTitleAndCreated(full, slug) + out = append(out, noteEntry{Hall: hall, Slug: slug, Title: title, Created: created}) + } + } + return out, nil +} + +// readTitleAndCreated reads YAML frontmatter for title + created_at; falls +// back to slug and file mtime when absent. +func readTitleAndCreated(path, slug string) (string, string) { + f, err := os.Open(path) + if err != nil { + return slug, "" + } + defer func() { _ = f.Close() }() + + title, created := "", "" + scanner := bufio.NewScanner(f) + inFrontmatter := false + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == "---" { + if !inFrontmatter { + inFrontmatter = true + continue + } + break + } + if !inFrontmatter { + continue + } + key, val, ok := strings.Cut(line, ":") + if !ok { + continue + } + v := strings.Trim(strings.TrimSpace(val), `"'`) + switch strings.TrimSpace(key) { + case "title": + title = v + case "created_at": + if t, err := time.Parse(time.RFC3339, v); err == nil { + created = t.UTC().Format("2006-01-02") + } else { + created = v + } + } + } + if title == "" { + title = slug + } + if created == "" { + if info, err := os.Stat(path); err == nil { + created = info.ModTime().UTC().Format("2006-01-02") + } + } + return title, created +} diff --git a/ingestion/internal/brain/index_test.go b/ingestion/internal/brain/index_test.go new file mode 100644 index 0000000..cd48276 --- /dev/null +++ b/ingestion/internal/brain/index_test.go @@ -0,0 +1,85 @@ +package brain_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/brain" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildWingIndex(t *testing.T) { + dir := t.TempDir() + for _, p := range []struct{ rel, body string }{ + {"wiki/jepa-fx/decisions/val-vol.md", "---\ntitle: Val Vol R2\ncreated_at: 2026-05-06T10:00:00Z\n---\nbody\n"}, + {"wiki/jepa-fx/facts/architecture.md", "---\ntitle: Architecture\ncreated_at: 2026-05-04T10:00:00Z\n---\nbody\n"}, + {"wiki/jepa-fx/sources/paper.md", "---\n---\nbody\n"}, + } { + full := filepath.Join(dir, p.rel) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644)) + } + + require.NoError(t, brain.BuildWingIndex(dir, "jepa-fx")) + + got, err := os.ReadFile(filepath.Join(dir, "wiki", "jepa-fx", "_index.md")) + require.NoError(t, err) + s := string(got) + assert.Contains(t, s, "# jepa-fx") + assert.Contains(t, s, "| Hall | Note | Created |") + assert.Contains(t, s, "| decisions | [Val Vol R2](decisions/val-vol.md) | 2026-05-06 |") + assert.Contains(t, s, "| facts | [Architecture](facts/architecture.md) | 2026-05-04 |") + assert.Contains(t, s, "| sources | [paper](sources/paper.md) |") + // Halls sorted alphabetically. + assert.Less(t, indexOf(s, "decisions"), indexOf(s, "facts")) + assert.Less(t, indexOf(s, "facts"), indexOf(s, "sources")) +} + +func TestBuildWingIndex_SkipsInvalidHalls(t *testing.T) { + dir := t.TempDir() + wingDir := filepath.Join(dir, "wiki", "jepa-fx") + require.NoError(t, os.MkdirAll(filepath.Join(wingDir, "garbage"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(wingDir, "garbage", "x.md"), []byte("x"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(wingDir, "facts"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(wingDir, "facts", "y.md"), []byte("y"), 0o644)) + + require.NoError(t, brain.BuildWingIndex(dir, "jepa-fx")) + got, err := os.ReadFile(filepath.Join(wingDir, "_index.md")) + require.NoError(t, err) + s := string(got) + assert.Contains(t, s, "facts") + assert.NotContains(t, s, "garbage") +} + +func TestBuildAllWingIndexes(t *testing.T) { + dir := t.TempDir() + for _, p := range []struct{ rel, body string }{ + {"wiki/a/facts/x.md", "x"}, + {"wiki/b/facts/y.md", "y"}, + } { + full := filepath.Join(dir, p.rel) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644)) + } + require.NoError(t, brain.BuildAllWingIndexes(dir)) + _, err := os.Stat(filepath.Join(dir, "wiki", "a", "_index.md")) + require.NoError(t, err) + _, err = os.Stat(filepath.Join(dir, "wiki", "b", "_index.md")) + require.NoError(t, err) +} + +func TestBuildWingIndex_NoWingDir(t *testing.T) { + dir := t.TempDir() + require.NoError(t, brain.BuildWingIndex(dir, "ghost")) +} + +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} diff --git a/ingestion/internal/brain/path.go b/ingestion/internal/brain/path.go new file mode 100644 index 0000000..b630f36 --- /dev/null +++ b/ingestion/internal/brain/path.go @@ -0,0 +1,70 @@ +// Package brain provides the wing/hall path taxonomy used by the brain +// wiki layout. A note's canonical location is +// brain/wiki///.md, where Wing is a free-form topic +// domain and Hall is one of a closed vocabulary of memory types. +package brain + +import ( + "fmt" + "path/filepath" + "strings" +) + +// ValidHalls is the closed vocabulary of hall names. A hall captures the +// memory type of a note within any wing. +var ValidHalls = map[string]bool{ + "facts": true, + "decisions": true, + "failures": true, + "hypotheses": true, + "sources": true, +} + +// IsValidHall reports whether h is in the closed Hall vocabulary. +func IsValidHall(h string) bool { + return ValidHalls[h] +} + +// NotePath resolves the canonical filesystem path for a note given a +// wing, hall, and slug. Returns an error if hall is not in ValidHalls +// or if wing/slug sanitise to empty strings. +// +// The returned path is brain/wiki///.md with all +// segments sanitised: lowercased, alphanumerics and hyphens only. +func NotePath(brainDir, wing, hall, slug string) (string, error) { + if !IsValidHall(hall) { + return "", fmt.Errorf("invalid hall %q: must be one of facts/decisions/failures/hypotheses/sources", hall) + } + w := Sanitise(wing) + if w == "" { + return "", fmt.Errorf("invalid wing %q: must contain at least one alphanumeric character", wing) + } + s := Sanitise(strings.TrimSuffix(slug, ".md")) + if s == "" { + return "", fmt.Errorf("invalid slug %q: must contain at least one alphanumeric character", slug) + } + return filepath.Join(brainDir, "wiki", w, hall, s+".md"), nil +} + +// Sanitise lowercases s and keeps only [a-z0-9-], collapsing any other +// character (including path separators) to a hyphen. Leading/trailing +// hyphens and runs of hyphens are collapsed. +func Sanitise(s string) string { + s = strings.ToLower(strings.TrimSpace(s)) + var b strings.Builder + prevHyphen := true + for _, r := range s { + switch { + case r >= 'a' && r <= 'z', r >= '0' && r <= '9': + b.WriteRune(r) + prevHyphen = false + case r == '-' || r == '_' || r == ' ' || r == '/' || r == '\\' || r == '.': + if !prevHyphen { + b.WriteByte('-') + prevHyphen = true + } + } + } + out := b.String() + return strings.Trim(out, "-") +} diff --git a/ingestion/internal/brain/path_test.go b/ingestion/internal/brain/path_test.go new file mode 100644 index 0000000..b51bba1 --- /dev/null +++ b/ingestion/internal/brain/path_test.go @@ -0,0 +1,73 @@ +package brain_test + +import ( + "path/filepath" + "testing" + + "github.com/mathiasbq/hyperguild/ingestion/internal/brain" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNotePath_Valid(t *testing.T) { + got, err := brain.NotePath("/b", "jepa-fx", "decisions", "val-vol-r2") + require.NoError(t, err) + assert.Equal(t, filepath.Join("/b", "wiki", "jepa-fx", "decisions", "val-vol-r2.md"), got) +} + +func TestNotePath_StripsMdSuffix(t *testing.T) { + got, err := brain.NotePath("/b", "x", "facts", "note.md") + require.NoError(t, err) + assert.Equal(t, filepath.Join("/b", "wiki", "x", "facts", "note.md"), got) +} + +func TestNotePath_SanitisesWingAndSlug(t *testing.T) { + got, err := brain.NotePath("/b", "Jepa FX!", "facts", "Val Vol R2") + require.NoError(t, err) + assert.Equal(t, filepath.Join("/b", "wiki", "jepa-fx", "facts", "val-vol-r2.md"), got) +} + +func TestNotePath_RejectsInvalidHall(t *testing.T) { + _, err := brain.NotePath("/b", "x", "garbage", "y") + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid hall") +} + +func TestNotePath_RejectsEmptyWing(t *testing.T) { + _, err := brain.NotePath("/b", "!!!", "facts", "y") + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid wing") +} + +func TestNotePath_RejectsEmptySlug(t *testing.T) { + _, err := brain.NotePath("/b", "x", "facts", "!!!") + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid slug") +} + +func TestSanitise(t *testing.T) { + cases := map[string]string{ + "Jepa-FX": "jepa-fx", + " foo bar ": "foo-bar", + "Val/Vol\\R2.md": "val-vol-r2-md", + "!!!": "", + "___leading": "leading", + "trailing___": "trailing", + "multi---hyphen": "multi-hyphen", + "UPPER 123 mixed": "upper-123-mixed", + } + for in, want := range cases { + t.Run(in, func(t *testing.T) { + assert.Equal(t, want, brain.Sanitise(in)) + }) + } +} + +func TestIsValidHall(t *testing.T) { + for _, h := range []string{"facts", "decisions", "failures", "hypotheses", "sources"} { + assert.True(t, brain.IsValidHall(h), h) + } + for _, h := range []string{"", "Facts", "facts ", "rooms", "concepts", "entities"} { + assert.False(t, brain.IsValidHall(h), h) + } +} diff --git a/ingestion/internal/mcp/handlers.go b/ingestion/internal/mcp/handlers.go index 05ede71..70cd920 100644 --- a/ingestion/internal/mcp/handlers.go +++ b/ingestion/internal/mcp/handlers.go @@ -4,11 +4,13 @@ import ( "context" "encoding/json" "fmt" + "log/slog" "path/filepath" "strings" "time" "github.com/mathiasbq/hyperguild/ingestion/internal/api" + "github.com/mathiasbq/hyperguild/ingestion/internal/brain" "github.com/mathiasbq/hyperguild/ingestion/internal/extract" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/search" @@ -24,6 +26,10 @@ func (s *Server) tools() []map[string]any { int_ := func(desc string) map[string]any { return map[string]any{"type": "integer", "description": desc} } + enum := func(desc string, vals ...string) map[string]any { + return map[string]any{"type": "string", "description": desc, "enum": vals} + } + halls := []string{"facts", "decisions", "failures", "hypotheses", "sources"} schema := func(required []string, props map[string]any) json.RawMessage { b, _ := json.Marshal(map[string]any{ "type": "object", "required": required, "properties": props, @@ -34,20 +40,31 @@ func (s *Server) tools() []map[string]any { return []map[string]any{ { "name": "brain_query", - "description": "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files.", + "description": "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files. Optionally scope by wing (topic domain) and hall (memory type).", "inputSchema": schema([]string{"query"}, map[string]any{ "query": str("search terms"), "limit": int_("max results, default 5"), + "wing": str("optional wing to scope to, e.g. jepa-fx"), + "hall": enum("optional hall to scope to (requires wing)", halls...), }), }, { "name": "brain_write", - "description": "Write a raw knowledge note to brain/knowledge/.", + "description": "Write a markdown note to the brain. With wing+hall set, routes to brain/wiki/// with wing/hall/created_at frontmatter; otherwise writes to brain/knowledge/ (legacy).", "inputSchema": schema([]string{"content"}, map[string]any{ "content": str("markdown content"), - "filename": str("optional filename"), - "type": str("optional frontmatter type"), - "domain": str("optional frontmatter domain"), + "filename": str("optional filename or slug"), + "type": str("optional frontmatter type (legacy)"), + "domain": str("optional frontmatter domain (legacy)"), + "wing": str("optional topic domain, e.g. jepa-fx"), + "hall": enum("optional memory type (requires wing)", halls...), + }), + }, + { + "name": "brain_index", + "description": "Regenerate _index.md (Map of Content) for one or all wings under brain/wiki/. Auto-called after brain_write with wing+hall.", + "inputSchema": schema([]string{}, map[string]any{ + "wing": str("optional wing to index; if absent, rebuilds every wing"), }), }, { @@ -104,6 +121,8 @@ func (s *Server) tools() []map[string]any { type brainQueryArgs struct { Query string `json:"query"` Limit int `json:"limit,omitempty"` + Wing string `json:"wing,omitempty"` + Hall string `json:"hall,omitempty"` } func (s *Server) brainQuery(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { @@ -117,7 +136,12 @@ func (s *Server) brainQuery(ctx context.Context, args json.RawMessage) (json.Raw if a.Limit == 0 { a.Limit = 5 } - results, err := search.Query(s.brainDir, a.Query, a.Limit) + results, err := search.Query(s.brainDir, search.QueryOptions{ + Query: a.Query, + Limit: a.Limit, + Wing: a.Wing, + Hall: a.Hall, + }) if err != nil { return nil, fmt.Errorf("search: %w", err) } @@ -129,6 +153,8 @@ type brainWriteArgs struct { Filename string `json:"filename,omitempty"` Type string `json:"type,omitempty"` Domain string `json:"domain,omitempty"` + Wing string `json:"wing,omitempty"` + Hall string `json:"hall,omitempty"` } func (s *Server) brainWrite(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { @@ -136,13 +162,51 @@ func (s *Server) brainWrite(ctx context.Context, args json.RawMessage) (json.Raw if err := json.Unmarshal(args, &a); err != nil { return nil, fmt.Errorf("parse args: %w", err) } - relPath, err := api.WriteNote(s.brainDir, a.Content, a.Filename, a.Type, a.Domain) + relPath, err := api.WriteNote(s.brainDir, api.WriteNoteOptions{ + Content: a.Content, + Filename: a.Filename, + Type: a.Type, + Domain: a.Domain, + Wing: a.Wing, + Hall: a.Hall, + }) if err != nil { return nil, err } + // Auto-regenerate the wing _index.md when the write landed in the + // structured wiki. A failure here is best-effort — log and move on, + // since the note itself is already written. + if a.Wing != "" && a.Hall != "" { + if err := brain.BuildWingIndex(s.brainDir, a.Wing); err != nil { + slog.Warn("brain_write: auto-index failed", "wing", a.Wing, "err", err) + } + } return json.Marshal(map[string]string{"path": relPath}) } +type brainIndexArgs struct { + Wing string `json:"wing,omitempty"` +} + +func (s *Server) brainIndex(ctx context.Context, args json.RawMessage) (json.RawMessage, error) { + var a brainIndexArgs + if len(args) > 0 { + if err := json.Unmarshal(args, &a); err != nil { + return nil, fmt.Errorf("parse args: %w", err) + } + } + if a.Wing == "" { + if err := brain.BuildAllWingIndexes(s.brainDir); err != nil { + return nil, fmt.Errorf("index: %w", err) + } + return json.Marshal(map[string]any{"status": "ok", "scope": "all"}) + } + if err := brain.BuildWingIndex(s.brainDir, a.Wing); err != nil { + return nil, fmt.Errorf("index: %w", err) + } + return json.Marshal(map[string]any{"status": "ok", "scope": a.Wing}) +} + type brainIngestRawArgs struct { Source string `json:"source"` Pages []pipeline.RawPage `json:"pages"` diff --git a/ingestion/internal/mcp/handlers_test.go b/ingestion/internal/mcp/handlers_test.go index 018f28e..b3da72d 100644 --- a/ingestion/internal/mcp/handlers_test.go +++ b/ingestion/internal/mcp/handlers_test.go @@ -70,6 +70,58 @@ func TestBrainWriteCreatesFile(t *testing.T) { assert.Contains(t, string(got), "# Test") } +func TestBrainWriteWingHallRoutesToWiki(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil, nil) + + resp := toolCall(t, srv, "brain_write", map[string]any{ + "content": "# Val Vol\n\nbody", + "filename": "val-vol-r2", + "wing": "jepa-fx", + "hall": "decisions", + }) + require.Nil(t, resp["error"]) + + got, err := os.ReadFile(filepath.Join(brainDir, "wiki", "jepa-fx", "decisions", "val-vol-r2.md")) + require.NoError(t, err) + assert.Contains(t, string(got), "wing: jepa-fx") + assert.Contains(t, string(got), "hall: decisions") + assert.Contains(t, string(got), "created_at:") + assert.Contains(t, string(got), "# Val Vol") +} + +func TestBrainWriteRejectsInvalidHall(t *testing.T) { + brainDir := t.TempDir() + srv := mcp.NewServer(brainDir, nil, nil, nil) + resp := toolCall(t, srv, "brain_write", map[string]any{ + "content": "x", + "wing": "jepa-fx", + "hall": "garbage", + }) + require.NotNil(t, resp["error"]) +} + +func TestBrainQueryWingScope(t *testing.T) { + brainDir := t.TempDir() + for _, p := range []struct{ rel, body string }{ + {"wiki/jepa-fx/facts/x.md", "---\nwing: jepa-fx\nhall: facts\n---\nfoo keyword.\n"}, + {"wiki/other/facts/y.md", "---\nwing: other\nhall: facts\n---\nfoo keyword.\n"}, + } { + full := filepath.Join(brainDir, p.rel) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644)) + } + srv := mcp.NewServer(brainDir, nil, nil, nil) + resp := toolCall(t, srv, "brain_query", map[string]any{ + "query": "foo", + "wing": "jepa-fx", + }) + require.Nil(t, resp["error"]) + text := resp["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string) + assert.Contains(t, text, "wiki/jepa-fx/facts/x.md") + assert.NotContains(t, text, "wiki/other/facts/y.md") +} + func TestBrainWriteRejectsTraversal(t *testing.T) { brainDir := t.TempDir() srv := mcp.NewServer(brainDir, nil, nil, nil) diff --git a/ingestion/internal/mcp/server.go b/ingestion/internal/mcp/server.go index 8f4f58e..d60f602 100644 --- a/ingestion/internal/mcp/server.go +++ b/ingestion/internal/mcp/server.go @@ -1,5 +1,6 @@ // Package mcp implements an MCP HTTP handler for the ingestion service. -// Exposed tools: brain_query, brain_write, brain_ingest, brain_ingest_raw, session_log. +// Exposed tools: brain_query, brain_write, brain_index, brain_ingest, +// brain_ingest_raw, brain_answer, brain_classify, session_log. package mcp import ( @@ -136,6 +137,8 @@ func (s *Server) handleCall(ctx context.Context, name string, args json.RawMessa return s.brainQuery(ctx, args) case "brain_write": return s.brainWrite(ctx, args) + case "brain_index": + return s.brainIndex(ctx, args) case "brain_ingest_raw": return s.brainIngestRaw(ctx, args) case "brain_ingest": diff --git a/ingestion/internal/mcp/server_test.go b/ingestion/internal/mcp/server_test.go index 2918b09..678cdba 100644 --- a/ingestion/internal/mcp/server_test.go +++ b/ingestion/internal/mcp/server_test.go @@ -55,7 +55,7 @@ func TestServerToolsList(t *testing.T) { names = append(names, t.(map[string]any)["name"].(string)) } assert.ElementsMatch(t, []string{ - "brain_query", "brain_write", "brain_ingest_raw", "brain_ingest", + "brain_query", "brain_write", "brain_index", "brain_ingest_raw", "brain_ingest", "brain_answer", "brain_classify", "session_log", }, names) } diff --git a/ingestion/internal/mcp/tools_answer.go b/ingestion/internal/mcp/tools_answer.go index edfb299..9233eb8 100644 --- a/ingestion/internal/mcp/tools_answer.go +++ b/ingestion/internal/mcp/tools_answer.go @@ -35,7 +35,7 @@ func (s *Server) brainAnswer(ctx context.Context, args json.RawMessage) (json.Ra return nil, fmt.Errorf("query is required") } - results, err := search.Query(s.brainDir, a.Query, 10) + results, err := search.Query(s.brainDir, search.QueryOptions{Query: a.Query, Limit: 10}) if err != nil { return nil, fmt.Errorf("search: %w", err) } diff --git a/ingestion/internal/search/search.go b/ingestion/internal/search/search.go index 071fc46..e43321f 100644 --- a/ingestion/internal/search/search.go +++ b/ingestion/internal/search/search.go @@ -9,6 +9,8 @@ import ( "path/filepath" "sort" "strings" + + "github.com/mathiasbq/hyperguild/ingestion/internal/brain" ) // Result is a single search hit from the brain wiki. @@ -17,24 +19,41 @@ type Result struct { Title string `json:"title"` Excerpt string `json:"excerpt"` Score int `json:"score"` + Wing string `json:"wing,omitempty"` + Hall string `json:"hall,omitempty"` } -// Query searches all .md files under brainDir/wiki/ for pages containing -// any of the whitespace-separated terms in query. Returns up to limit results -// sorted by score descending. -func Query(brainDir, query string, limit int) ([]Result, error) { - if limit <= 0 { - limit = 5 +// QueryOptions configures a search. +// +// When Wing is set, the walk is restricted to brain/wiki//. +// When Hall is additionally set, the walk is restricted to +// brain/wiki///. Without either, the legacy walk over +// brain/knowledge/ and brain/wiki/ is used. +type QueryOptions struct { + Query string + Limit int + Wing string + Hall string +} + +// Query searches the brain. Returns up to opts.Limit results sorted by +// score descending. Empty query returns nil. +func Query(brainDir string, opts QueryOptions) ([]Result, error) { + if opts.Limit <= 0 { + opts.Limit = 5 } - terms := strings.Fields(strings.ToLower(query)) + terms := strings.Fields(strings.ToLower(opts.Query)) if len(terms) == 0 { return nil, nil } - var results []Result + roots, err := resolveRoots(brainDir, opts.Wing, opts.Hall) + if err != nil { + return nil, err + } - for _, subdir := range []string{"knowledge", "wiki"} { - dir := filepath.Join(brainDir, subdir) + var results []Result + for _, dir := range roots { if _, statErr := os.Stat(dir); os.IsNotExist(statErr) { continue } @@ -46,13 +65,11 @@ func Query(brainDir, query string, limit int) ([]Result, error) { if d.IsDir() || !strings.HasSuffix(path, ".md") { return nil } - content, err := os.ReadFile(path) if err != nil { slog.Warn("search: skipping unreadable file", "path", path, "err", err) return nil } - lower := strings.ToLower(string(content)) score := 0 for _, term := range terms { @@ -61,18 +78,19 @@ func Query(brainDir, query string, limit int) ([]Result, error) { if score == 0 { return nil } - rel, err := filepath.Rel(brainDir, path) if err != nil { return fmt.Errorf("rel path: %w", err) } rel = filepath.ToSlash(rel) - + wing, hall := extractWingHall(string(content), rel) results = append(results, Result{ Path: rel, Title: extractTitle(string(content), d.Name()), Excerpt: excerpt(string(content), 300), Score: score, + Wing: wing, + Hall: hall, }) return nil }) @@ -84,12 +102,81 @@ func Query(brainDir, query string, limit int) ([]Result, error) { sort.Slice(results, func(i, j int) bool { return results[i].Score > results[j].Score }) - if len(results) > limit { - results = results[:limit] + if len(results) > opts.Limit { + results = results[:opts.Limit] } return results, nil } +// resolveRoots returns the directories to walk for the given wing/hall +// filters. Validates hall against the closed vocabulary when set. +func resolveRoots(brainDir, wing, hall string) ([]string, error) { + if hall != "" && !brain.IsValidHall(hall) { + return nil, fmt.Errorf("invalid hall %q", hall) + } + if wing != "" { + w := brain.Sanitise(wing) + if w == "" { + return nil, fmt.Errorf("invalid wing %q", wing) + } + if hall != "" { + return []string{filepath.Join(brainDir, "wiki", w, hall)}, nil + } + return []string{filepath.Join(brainDir, "wiki", w)}, nil + } + if hall != "" { + return nil, fmt.Errorf("hall filter requires wing") + } + return []string{ + filepath.Join(brainDir, "knowledge"), + filepath.Join(brainDir, "wiki"), + }, nil +} + +// extractWingHall reads wing/hall from frontmatter first, falling back to +// path segments brain/wiki///. +func extractWingHall(content, relPath string) (wing, hall string) { + scanner := bufio.NewScanner(strings.NewReader(content)) + inFrontmatter := false + for scanner.Scan() { + line := scanner.Text() + if strings.TrimSpace(line) == "---" { + if !inFrontmatter { + inFrontmatter = true + continue + } + break + } + if !inFrontmatter { + continue + } + key, val, ok := strings.Cut(line, ":") + if !ok { + continue + } + v := strings.Trim(strings.TrimSpace(val), `"'`) + switch strings.TrimSpace(key) { + case "wing": + wing = v + case "hall": + hall = v + } + } + if wing != "" && hall != "" { + return wing, hall + } + parts := strings.Split(relPath, "/") + if len(parts) >= 4 && parts[0] == "wiki" { + if wing == "" { + wing = parts[1] + } + if hall == "" && brain.IsValidHall(parts[2]) { + hall = parts[2] + } + } + return wing, hall +} + func extractTitle(content, filename string) string { scanner := bufio.NewScanner(strings.NewReader(content)) inFrontmatter := false @@ -113,7 +200,6 @@ func extractTitle(content, filename string) string { } func excerpt(content string, maxLen int) string { - // Skip frontmatter, return first maxLen chars of body. parts := strings.SplitN(content, "---", 3) body := content if len(parts) == 3 { diff --git a/ingestion/internal/search/search_test.go b/ingestion/internal/search/search_test.go index 45cafc1..1ef4e4c 100644 --- a/ingestion/internal/search/search_test.go +++ b/ingestion/internal/search/search_test.go @@ -27,7 +27,7 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) { 0o644, )) - results, err := search.Query(dir, "retry transient", 5) + results, err := search.Query(dir, search.QueryOptions{Query: "retry transient", Limit: 5}) require.NoError(t, err) require.Len(t, results, 1) assert.Equal(t, "knowledge/retry-logic.md", results[0].Path) @@ -36,6 +36,49 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) { assert.Contains(t, results[0].Excerpt, "Retry") } +func TestSearch_WingHallScoping(t *testing.T) { + dir := t.TempDir() + for _, p := range []struct{ rel, body string }{ + {"wiki/jepa-fx/decisions/val-vol.md", "---\nwing: jepa-fx\nhall: decisions\n---\nval-vol-r2 keyword.\n"}, + {"wiki/jepa-fx/facts/architecture.md", "---\nwing: jepa-fx\nhall: facts\n---\nval-vol-r2 keyword in facts.\n"}, + {"wiki/hyperguild/decisions/routing.md", "---\nwing: hyperguild\nhall: decisions\n---\nval-vol-r2 reference.\n"}, + {"knowledge/loose.md", "---\n---\nval-vol-r2 in knowledge.\n"}, + } { + full := filepath.Join(dir, p.rel) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644)) + } + + // No filter: walk both knowledge/ and wiki/ — all 4 match. + got, err := search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10}) + require.NoError(t, err) + assert.Len(t, got, 4) + + // Wing scope: 2 jepa-fx hits, no hyperguild, no knowledge. + got, err = search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10, Wing: "jepa-fx"}) + require.NoError(t, err) + require.Len(t, got, 2) + for _, r := range got { + assert.Equal(t, "jepa-fx", r.Wing) + } + + // Wing+Hall scope: 1 hit. + got, err = search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10, Wing: "jepa-fx", Hall: "decisions"}) + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, "jepa-fx", got[0].Wing) + assert.Equal(t, "decisions", got[0].Hall) + assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", got[0].Path) + + // Invalid hall rejected. + _, err = search.Query(dir, search.QueryOptions{Query: "x", Wing: "jepa-fx", Hall: "garbage"}) + require.Error(t, err) + + // Hall without wing rejected. + _, err = search.Query(dir, search.QueryOptions{Query: "x", Hall: "facts"}) + require.Error(t, err) +} + func TestSearch_RespectsLimit(t *testing.T) { dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755)) @@ -46,7 +89,7 @@ func TestSearch_RespectsLimit(t *testing.T) { 0o644, )) } - results, err := search.Query(dir, "retry", 3) + results, err := search.Query(dir, search.QueryOptions{Query: "retry", Limit: 3}) require.NoError(t, err) assert.LessOrEqual(t, len(results), 3) } diff --git a/scripts/migrate-brain-halls.sh b/scripts/migrate-brain-halls.sh new file mode 100755 index 0000000..cc98740 --- /dev/null +++ b/scripts/migrate-brain-halls.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# migrate-brain-halls.sh — move flat brain/wiki/{concepts,entities}/ notes +# into the structured brain/wiki/// layout introduced by +# hyperguild#1. +# +# Reads each note's YAML frontmatter: +# type: maps to hall (decision, hypothesis, failure, source → eponymous; +# concept, entity, anything else → facts) +# domain: maps to wing (sanitised: lowercase, alphanumerics + hyphens); +# empty → "general" +# +# Dry-run by default. Pass --commit to actually move files. Idempotent: +# already-migrated notes (already under a Wing dir) are left alone. +# +# Usage: +# scripts/migrate-brain-halls.sh /path/to/brain # dry-run +# scripts/migrate-brain-halls.sh --commit /path/to/brain # apply + +set -euo pipefail + +COMMIT=0 +BRAIN="" +for arg in "$@"; do + case "$arg" in + --commit) COMMIT=1 ;; + -h|--help) + sed -n '2,18p' "$0" + exit 0 + ;; + *) BRAIN="$arg" ;; + esac +done + +if [[ -z "$BRAIN" ]]; then + echo "error: brain directory required" >&2 + echo "usage: $0 [--commit] " >&2 + exit 2 +fi + +if [[ ! -d "$BRAIN" ]]; then + echo "error: $BRAIN is not a directory" >&2 + exit 2 +fi + +WIKI="$BRAIN/wiki" +if [[ ! -d "$WIKI" ]]; then + echo "no $WIKI/ — nothing to migrate" + exit 0 +fi + +sanitise() { + # lowercase, replace non-alnum with hyphen, collapse hyphens, trim + local s + s=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' \ + | sed -E 's/[^a-z0-9]+/-/g; s/^-+//; s/-+$//; s/-+/-/g') + printf '%s' "$s" +} + +# extract_frontmatter_value +# Echoes the value (trimmed, unquoted) of `key:` from a leading YAML +# frontmatter block. Empty if absent or no frontmatter. +extract_frontmatter_value() { + awk -v key="$2" ' + BEGIN { in_fm = 0; first = 1 } + /^---[[:space:]]*$/ { + if (first) { in_fm = 1; first = 0; next } + if (in_fm) { exit } + } + in_fm { + idx = index($0, ":") + if (idx == 0) next + k = substr($0, 1, idx-1) + v = substr($0, idx+1) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", k) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", v) + gsub(/^["'\'']|["'\'']$/, "", v) + if (k == key) { print v; exit } + } + ' "$1" +} + +hall_for_type() { + case "$1" in + decision|decisions) echo "decisions" ;; + hypothesis|hypotheses) echo "hypotheses" ;; + failure|failures) echo "failures" ;; + source|sources) echo "sources" ;; + *) echo "facts" ;; + esac +} + +declare -i moved=0 skipped=0 + +migrate_source_dir() { + local src="$1" + [[ -d "$src" ]] || return 0 + while IFS= read -r -d '' f; do + local typ domain wing hall slug dest + typ=$(extract_frontmatter_value "$f" type) + domain=$(extract_frontmatter_value "$f" domain) + hall=$(hall_for_type "$typ") + wing=$(sanitise "${domain:-general}") + [[ -z "$wing" ]] && wing="general" + slug=$(basename "$f" .md) + dest="$WIKI/$wing/$hall/$slug.md" + + if [[ "$f" == "$dest" ]]; then + skipped=$((skipped + 1)) + continue + fi + + if [[ -e "$dest" ]]; then + echo "skip (target exists): $f → $dest" + skipped=$((skipped + 1)) + continue + fi + + if [[ "$COMMIT" -eq 1 ]]; then + mkdir -p "$(dirname "$dest")" + git -C "$BRAIN" mv "$f" "$dest" 2>/dev/null || mv "$f" "$dest" + fi + echo "move: $f → $dest" + moved=$((moved + 1)) + done < <(find "$src" -maxdepth 1 -type f -name '*.md' -print0) +} + +migrate_source_dir "$WIKI/concepts" +migrate_source_dir "$WIKI/entities" + +echo +if [[ "$COMMIT" -eq 1 ]]; then + echo "moved=$moved skipped=$skipped (committed)" +else + echo "moved=$moved skipped=$skipped (dry-run — pass --commit to apply)" +fi