Files
hyperguild/ingestion/internal/api/handler.go
Mathias 75685e7b67
All checks were successful
CI / Lint / Test / Vet (push) Successful in 11s
CI / Mirror to GitHub (push) Successful in 4s
feat(brain): structured wing/hall taxonomy + obsidian-compatible layout
Adds a two-dimensional address (wing, hall) to brain notes. A wing is a
topic domain (e.g. jepa-fx, hyperguild); a hall is one of a closed
vocabulary of memory types (facts, decisions, failures, hypotheses,
sources). Notes route to brain/wiki/<wing>/<hall>/<slug>.md with
wing/hall/created_at YAML frontmatter, making the directory a valid
Obsidian vault.

Changes:
- new package ingestion/internal/brain (NotePath, ValidHalls, Sanitise,
  BuildWingIndex, BuildAllWingIndexes)
- api.WriteNote refactored to WriteNoteOptions; wing+hall routes to
  brain/wiki/, otherwise falls back to brain/knowledge/ (legacy)
- search.Query → QueryOptions with optional Wing/Hall filtering; Result
  carries wing/hall extracted from frontmatter or path segments
- MCP tools brain_write and brain_query gain optional wing/hall params
  (hall enum-validated); new brain_index tool regenerates _index.md MOC
- POST /index REST endpoint mirrors brain_index
- brain_write auto-rebuilds the wing's _index.md after a wing+hall write
- scripts/migrate-brain-halls.sh migrates flat brain/wiki/{concepts,entities}/
  into the new layout (dry-run by default, --commit applies)

All existing tests pass; new tests cover wing/hall write routing, scope
filtering, invalid hall rejection, _index.md generation, and migration
script paths.

Closes hyperguild#1.
2026-05-18 20:47:08 +02:00

459 lines
13 KiB
Go

// ingestion/internal/api/handler.go
package api
import (
"encoding/json"
"fmt"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
)
// Handler serves the ingestion HTTP API.
type Handler struct {
brainDir string
logger *slog.Logger
pipeline pipeline.Config
}
// NewHandler constructs a Handler. brainDir is the absolute path to brain/.
func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Config) *Handler {
if logger == nil {
logger = slog.Default()
}
return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg}
}
type queryRequest struct {
Query string `json:"query"`
Limit int `json:"limit,omitempty"`
Wing string `json:"wing,omitempty"`
Hall string `json:"hall,omitempty"`
}
type writeRequest struct {
Content string `json:"content"`
Filename string `json:"filename,omitempty"`
Type string `json:"type,omitempty"`
Domain string `json:"domain,omitempty"`
Wing string `json:"wing,omitempty"`
Hall string `json:"hall,omitempty"`
}
type ingestRequest struct {
Content string `json:"content"`
Source string `json:"source"`
DryRun bool `json:"dry_run"`
}
type ingestPathRequest struct {
Path string `json:"path"`
Source string `json:"source"`
DryRun bool `json:"dry_run"`
}
type ingestResponse struct {
Pages []string `json:"pages"`
Warnings []string `json:"warnings"`
}
// Query handles POST /query — full-text search across the brain wiki.
func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
var req queryRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
if strings.TrimSpace(req.Query) == "" {
writeError(w, http.StatusBadRequest, "query is required")
return
}
if req.Limit == 0 {
req.Limit = 5
}
results, err := search.Query(h.brainDir, search.QueryOptions{
Query: req.Query,
Limit: req.Limit,
Wing: req.Wing,
Hall: req.Hall,
})
if err != nil {
h.logger.Error("query failed", "err", err)
writeError(w, http.StatusInternalServerError, "search error")
return
}
writeJSON(w, map[string]any{"results": results})
}
// WriteNoteOptions configures how a brain note is written.
//
// When both Wing and Hall are non-empty, the note routes into the
// structured wiki at brain/wiki/<wing>/<hall>/<slug>.md and gets
// wing/hall/created_at injected into its YAML frontmatter.
//
// When either is empty, the note falls back to brain/knowledge/<filename>
// with optional type/domain frontmatter (legacy behaviour).
type WriteNoteOptions struct {
Content string
Filename string
Type string
Domain string
Wing string
Hall string
}
// WriteNote writes a markdown note into the brain. Returns the path
// relative to brainDir (forward-slashed). Filename traversal is rejected.
func WriteNote(brainDir string, opts WriteNoteOptions) (string, error) {
if opts.Content == "" {
return "", fmt.Errorf("content is required")
}
if opts.Wing != "" && opts.Hall != "" {
return writeHallNote(brainDir, opts)
}
if opts.Wing != "" || opts.Hall != "" {
return "", fmt.Errorf("wing and hall must be set together")
}
return writeLegacyNote(brainDir, opts)
}
// writeHallNote routes a note into brain/wiki/<wing>/<hall>/ and injects
// wing/hall/created_at frontmatter.
func writeHallNote(brainDir string, opts WriteNoteOptions) (string, error) {
slug := opts.Filename
if slug == "" {
slug = time.Now().UTC().Format("2006-01-02-150405") + "-auto"
}
dest, err := brain.NotePath(brainDir, opts.Wing, opts.Hall, slug)
if err != nil {
return "", err
}
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
return "", fmt.Errorf("create hall dir: %w", err)
}
var fm strings.Builder
fm.WriteString("---\n")
fmt.Fprintf(&fm, "wing: %s\n", brain.Sanitise(opts.Wing))
fmt.Fprintf(&fm, "hall: %s\n", opts.Hall)
fmt.Fprintf(&fm, "created_at: %s\n", time.Now().UTC().Format(time.RFC3339))
if opts.Type != "" {
fmt.Fprintf(&fm, "type: %s\n", opts.Type)
}
if opts.Domain != "" {
fmt.Fprintf(&fm, "domain: %s\n", opts.Domain)
}
fm.WriteString("---\n")
if err := os.WriteFile(dest, []byte(fm.String()+opts.Content), 0o644); err != nil {
return "", fmt.Errorf("write: %w", err)
}
rel, _ := filepath.Rel(brainDir, dest)
return filepath.ToSlash(rel), nil
}
// writeLegacyNote preserves the original brain/knowledge/ behaviour for
// callers that have not adopted the wing/hall taxonomy.
func writeLegacyNote(brainDir string, opts WriteNoteOptions) (string, error) {
filename := opts.Filename
if filename == "" {
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
}
rawDir := filepath.Join(brainDir, "knowledge")
if err := os.MkdirAll(rawDir, 0o755); err != nil {
return "", fmt.Errorf("create raw dir: %w", err)
}
finalContent := opts.Content
if opts.Type != "" || opts.Domain != "" {
var fm strings.Builder
fm.WriteString("---\n")
if opts.Type != "" {
fmt.Fprintf(&fm, "type: %s\n", opts.Type)
}
if opts.Domain != "" {
fmt.Fprintf(&fm, "domain: %s\n", opts.Domain)
}
fm.WriteString("---\n")
finalContent = fm.String() + opts.Content
}
if strings.ContainsAny(filename, `/\`) {
return "", fmt.Errorf("invalid filename")
}
base := filepath.Base(filename)
if base == "." || base == ".." || base == "" {
return "", fmt.Errorf("invalid filename")
}
if !strings.HasSuffix(base, ".md") {
base += ".md"
}
dest := filepath.Join(rawDir, base)
if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil {
return "", fmt.Errorf("write: %w", err)
}
rel, _ := filepath.Rel(brainDir, dest)
return filepath.ToSlash(rel), nil
}
// Write handles POST /write — write raw content to brain/knowledge/.
func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
var req writeRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
relPath, err := WriteNote(h.brainDir, WriteNoteOptions(req))
if err != nil {
h.logger.Error("write failed", "err", err)
writeError(w, http.StatusBadRequest, err.Error())
return
}
if req.Wing != "" && req.Hall != "" {
if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil {
h.logger.Warn("auto-index failed", "wing", req.Wing, "err", err)
}
}
writeJSON(w, map[string]string{"path": relPath})
}
type indexRequest struct {
Wing string `json:"wing,omitempty"`
}
// Index handles POST /index — regenerate the _index.md MOC for one wing
// (when "wing" is set) or for every wing (when omitted).
func (h *Handler) Index(w http.ResponseWriter, r *http.Request) {
var req indexRequest
if r.ContentLength > 0 {
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
}
if req.Wing == "" {
if err := brain.BuildAllWingIndexes(h.brainDir); err != nil {
h.logger.Error("index all failed", "err", err)
writeError(w, http.StatusInternalServerError, "index error")
return
}
writeJSON(w, map[string]any{"status": "ok", "scope": "all"})
return
}
if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil {
h.logger.Error("index failed", "wing", req.Wing, "err", err)
writeError(w, http.StatusBadRequest, err.Error())
return
}
writeJSON(w, map[string]any{"status": "ok", "scope": req.Wing})
}
// Ingest handles POST /ingest — run the pipeline on provided content.
func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) {
var req ingestRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
if strings.TrimSpace(req.Content) == "" {
writeError(w, http.StatusBadRequest, "content is required")
return
}
if strings.TrimSpace(req.Source) == "" {
writeError(w, http.StatusBadRequest, "source is required")
return
}
result, err := pipeline.Run(r.Context(), h.pipeline, h.brainDir, req.Content, req.Source, req.DryRun)
if err != nil {
h.logger.Error("ingest failed", "source", req.Source, "err", err)
writeError(w, http.StatusInternalServerError, "ingest error")
return
}
pages := result.Pages
if pages == nil {
pages = []string{}
}
warnings := result.Warnings
if warnings == nil {
warnings = []string{}
}
writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings})
}
// supportedExtensions lists file extensions that IngestPath will process.
var supportedExtensions = map[string]bool{
".md": true,
".txt": true,
".pdf": true,
}
// IngestPath handles POST /ingest-path — ingest a file or directory.
func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) {
var req ingestPathRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
if strings.TrimSpace(req.Path) == "" {
writeError(w, http.StatusBadRequest, "path is required")
return
}
info, err := os.Stat(req.Path)
if err != nil {
writeError(w, http.StatusBadRequest, fmt.Sprintf("path not accessible: %v", err))
return
}
var allPages []string
var allWarnings []string
if info.IsDir() {
err = filepath.WalkDir(req.Path, func(path string, d os.DirEntry, walkErr error) error {
if walkErr != nil {
return walkErr
}
if d.IsDir() {
return nil
}
ext := strings.ToLower(filepath.Ext(path))
if !supportedExtensions[ext] {
return nil
}
content, readErr := extract.Text(path)
if readErr != nil {
allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr))
return nil
}
source := req.Source
if source == "" {
source = filepath.Base(path)
}
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
if runErr != nil {
allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr))
return nil
}
allPages = append(allPages, result.Pages...)
allWarnings = append(allWarnings, result.Warnings...)
return nil
})
if err != nil {
h.logger.Error("walk dir failed", "path", req.Path, "err", err)
writeError(w, http.StatusInternalServerError, fmt.Sprintf("walk error: %v", err))
return
}
} else {
ext := strings.ToLower(filepath.Ext(req.Path))
if !supportedExtensions[ext] {
writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext))
return
}
content, readErr := extract.Text(req.Path)
if readErr != nil {
writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr))
return
}
source := req.Source
if source == "" {
source = filepath.Base(req.Path)
}
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
if runErr != nil {
h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr)
writeError(w, http.StatusInternalServerError, "ingest error")
return
}
allPages = result.Pages
allWarnings = result.Warnings
}
if allPages == nil {
allPages = []string{}
}
if allWarnings == nil {
allWarnings = []string{}
}
writeJSON(w, ingestResponse{Pages: allPages, Warnings: allWarnings})
}
type ingestRawRequest struct {
Source string `json:"source"`
Pages []pipeline.RawPage `json:"pages"`
DryRun bool `json:"dry_run"`
}
// IngestRaw handles POST /ingest-raw — run the pipeline on pre-parsed RawPages,
// skipping the LLM extraction step. Use when the caller has already produced
// structured page data (e.g. from a more capable model or manual curation).
func (h *Handler) IngestRaw(w http.ResponseWriter, r *http.Request) {
var req ingestRawRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid JSON")
return
}
if strings.TrimSpace(req.Source) == "" {
writeError(w, http.StatusBadRequest, "source is required")
return
}
if len(req.Pages) == 0 {
writeError(w, http.StatusBadRequest, "pages is required and must be non-empty")
return
}
result, err := pipeline.RunRaw(h.brainDir, req.Source, req.Pages, req.DryRun)
if err != nil {
h.logger.Error("ingest-raw failed", "source", req.Source, "err", err)
writeError(w, http.StatusInternalServerError, "ingest error")
return
}
pages := result.Pages
if pages == nil {
pages = []string{}
}
warnings := result.Warnings
if warnings == nil {
warnings = []string{}
}
writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings})
}
// BackfillRefs handles POST /backfill-refs — injects source back-references
// into all concept and entity pages based on existing wiki/sources/ pages.
func (h *Handler) BackfillRefs(w http.ResponseWriter, r *http.Request) {
n, err := pipeline.BackfillRefs(r.Context(), h.brainDir)
if err != nil {
h.logger.Error("backfill-refs failed", "err", err)
writeError(w, http.StatusInternalServerError, "backfill error")
return
}
writeJSON(w, map[string]int{"updated": n})
}
func writeJSON(w http.ResponseWriter, v any) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(v) //nolint:errcheck
}
func writeError(w http.ResponseWriter, code int, msg string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
json.NewEncoder(w).Encode(map[string]string{"error": msg}) //nolint:errcheck
}