// ingestion/internal/api/handler.go package api import ( "encoding/json" "fmt" "log/slog" "net/http" "os" "path/filepath" "strings" "time" "github.com/mathiasbq/hyperguild/ingestion/internal/brain" "github.com/mathiasbq/hyperguild/ingestion/internal/extract" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/search" "github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore" ) // Handler serves the ingestion HTTP API. type Handler struct { brainDir string logger *slog.Logger pipeline pipeline.Config embedStore vectorstore.Store embedClient vectorstore.Embedder } // NewHandler constructs a Handler. brainDir is the absolute path to brain/. func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Config) *Handler { if logger == nil { logger = slog.Default() } return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg} } // WithEmbedSync wires the optional vector store + embedder used by the // POST /backfill-embeddings endpoint. Calling with either nil is a no-op. func (h *Handler) WithEmbedSync(store vectorstore.Store, embedder vectorstore.Embedder) *Handler { h.embedStore = store h.embedClient = embedder return h } type queryRequest struct { Query string `json:"query"` Limit int `json:"limit,omitempty"` Wing string `json:"wing,omitempty"` Hall string `json:"hall,omitempty"` } type writeRequest struct { Content string `json:"content"` Filename string `json:"filename,omitempty"` Type string `json:"type,omitempty"` Domain string `json:"domain,omitempty"` Wing string `json:"wing,omitempty"` Hall string `json:"hall,omitempty"` } type ingestRequest struct { Content string `json:"content"` Source string `json:"source"` DryRun bool `json:"dry_run"` } type ingestPathRequest struct { Path string `json:"path"` Source string `json:"source"` DryRun bool `json:"dry_run"` } type ingestResponse struct { Pages []string `json:"pages"` Warnings []string `json:"warnings"` } // Query handles POST /query — full-text search across the brain wiki. func (h *Handler) Query(w http.ResponseWriter, r *http.Request) { var req queryRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Query) == "" { writeError(w, http.StatusBadRequest, "query is required") return } if req.Limit == 0 { req.Limit = 5 } results, err := search.Query(h.brainDir, search.QueryOptions{ Query: req.Query, Limit: req.Limit, Wing: req.Wing, Hall: req.Hall, }) if err != nil { h.logger.Error("query failed", "err", err) writeError(w, http.StatusInternalServerError, "search error") return } writeJSON(w, map[string]any{"results": results}) } // WriteNoteOptions configures how a brain note is written. // // When both Wing and Hall are non-empty, the note routes into the // structured wiki at brain/wiki///.md and gets // wing/hall/created_at injected into its YAML frontmatter. // // When either is empty, the note falls back to brain/knowledge/ // with optional type/domain frontmatter (legacy behaviour). type WriteNoteOptions struct { Content string Filename string Type string Domain string Wing string Hall string } // WriteNote writes a markdown note into the brain. Returns the path // relative to brainDir (forward-slashed). Filename traversal is rejected. func WriteNote(brainDir string, opts WriteNoteOptions) (string, error) { if opts.Content == "" { return "", fmt.Errorf("content is required") } if opts.Wing != "" && opts.Hall != "" { return writeHallNote(brainDir, opts) } if opts.Wing != "" || opts.Hall != "" { return "", fmt.Errorf("wing and hall must be set together") } return writeLegacyNote(brainDir, opts) } // writeHallNote routes a note into brain/wiki/// and injects // wing/hall/created_at frontmatter. func writeHallNote(brainDir string, opts WriteNoteOptions) (string, error) { slug := opts.Filename if slug == "" { slug = time.Now().UTC().Format("2006-01-02-150405") + "-auto" } dest, err := brain.NotePath(brainDir, opts.Wing, opts.Hall, slug) if err != nil { return "", err } if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { return "", fmt.Errorf("create hall dir: %w", err) } var fm strings.Builder fm.WriteString("---\n") fmt.Fprintf(&fm, "wing: %s\n", brain.Sanitise(opts.Wing)) fmt.Fprintf(&fm, "hall: %s\n", opts.Hall) fmt.Fprintf(&fm, "created_at: %s\n", time.Now().UTC().Format(time.RFC3339)) if opts.Type != "" { fmt.Fprintf(&fm, "type: %s\n", opts.Type) } if opts.Domain != "" { fmt.Fprintf(&fm, "domain: %s\n", opts.Domain) } fm.WriteString("---\n") if err := os.WriteFile(dest, []byte(fm.String()+opts.Content), 0o644); err != nil { return "", fmt.Errorf("write: %w", err) } rel, _ := filepath.Rel(brainDir, dest) return filepath.ToSlash(rel), nil } // writeLegacyNote preserves the original brain/knowledge/ behaviour for // callers that have not adopted the wing/hall taxonomy. func writeLegacyNote(brainDir string, opts WriteNoteOptions) (string, error) { filename := opts.Filename if filename == "" { filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405")) } rawDir := filepath.Join(brainDir, "knowledge") if err := os.MkdirAll(rawDir, 0o755); err != nil { return "", fmt.Errorf("create raw dir: %w", err) } finalContent := opts.Content if opts.Type != "" || opts.Domain != "" { var fm strings.Builder fm.WriteString("---\n") if opts.Type != "" { fmt.Fprintf(&fm, "type: %s\n", opts.Type) } if opts.Domain != "" { fmt.Fprintf(&fm, "domain: %s\n", opts.Domain) } fm.WriteString("---\n") finalContent = fm.String() + opts.Content } if strings.ContainsAny(filename, `/\`) { return "", fmt.Errorf("invalid filename") } base := filepath.Base(filename) if base == "." || base == ".." || base == "" { return "", fmt.Errorf("invalid filename") } if !strings.HasSuffix(base, ".md") { base += ".md" } dest := filepath.Join(rawDir, base) if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil { return "", fmt.Errorf("write: %w", err) } rel, _ := filepath.Rel(brainDir, dest) return filepath.ToSlash(rel), nil } // Write handles POST /write — write raw content to brain/knowledge/. func (h *Handler) Write(w http.ResponseWriter, r *http.Request) { var req writeRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } relPath, err := WriteNote(h.brainDir, WriteNoteOptions(req)) if err != nil { h.logger.Error("write failed", "err", err) writeError(w, http.StatusBadRequest, err.Error()) return } if req.Wing != "" && req.Hall != "" { if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil { h.logger.Warn("auto-index failed", "wing", req.Wing, "err", err) } } writeJSON(w, map[string]string{"path": relPath}) } // BackfillEmbeddings handles POST /backfill-embeddings — synchronously // embeds every note under brain/wiki/ that's not yet in the vector // store, and deletes rows for files no longer on disk. func (h *Handler) BackfillEmbeddings(w http.ResponseWriter, r *http.Request) { if h.embedStore == nil || h.embedClient == nil { writeError(w, http.StatusServiceUnavailable, "embeddings not configured (set BRAIN_PG_DSN and BRAIN_EMBED_URL)") return } res, err := vectorstore.Sync(r.Context(), h.brainDir, h.embedStore, h.embedClient) if err != nil { h.logger.Error("backfill failed", "err", err) writeError(w, http.StatusInternalServerError, "backfill error") return } errStrs := make([]string, 0, len(res.Errors)) for _, e := range res.Errors { errStrs = append(errStrs, e.Error()) } writeJSON(w, map[string]any{ "added": res.Added, "deleted": res.Deleted, "errors": errStrs, }) } type indexRequest struct { Wing string `json:"wing,omitempty"` } // Index handles POST /index — regenerate the _index.md MOC for one wing // (when "wing" is set) or for every wing (when omitted). func (h *Handler) Index(w http.ResponseWriter, r *http.Request) { var req indexRequest if r.ContentLength > 0 { if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } } if req.Wing == "" { if err := brain.BuildAllWingIndexes(h.brainDir); err != nil { h.logger.Error("index all failed", "err", err) writeError(w, http.StatusInternalServerError, "index error") return } writeJSON(w, map[string]any{"status": "ok", "scope": "all"}) return } if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil { h.logger.Error("index failed", "wing", req.Wing, "err", err) writeError(w, http.StatusBadRequest, err.Error()) return } writeJSON(w, map[string]any{"status": "ok", "scope": req.Wing}) } // Ingest handles POST /ingest — run the pipeline on provided content. func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) { var req ingestRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Content) == "" { writeError(w, http.StatusBadRequest, "content is required") return } if strings.TrimSpace(req.Source) == "" { writeError(w, http.StatusBadRequest, "source is required") return } result, err := pipeline.Run(r.Context(), h.pipeline, h.brainDir, req.Content, req.Source, req.DryRun) if err != nil { h.logger.Error("ingest failed", "source", req.Source, "err", err) writeError(w, http.StatusInternalServerError, "ingest error") return } pages := result.Pages if pages == nil { pages = []string{} } warnings := result.Warnings if warnings == nil { warnings = []string{} } writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings}) } // supportedExtensions lists file extensions that IngestPath will process. var supportedExtensions = map[string]bool{ ".md": true, ".txt": true, ".pdf": true, } // IngestPath handles POST /ingest-path — ingest a file or directory. func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) { var req ingestPathRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Path) == "" { writeError(w, http.StatusBadRequest, "path is required") return } info, err := os.Stat(req.Path) if err != nil { writeError(w, http.StatusBadRequest, fmt.Sprintf("path not accessible: %v", err)) return } var allPages []string var allWarnings []string if info.IsDir() { err = filepath.WalkDir(req.Path, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } ext := strings.ToLower(filepath.Ext(path)) if !supportedExtensions[ext] { return nil } content, readErr := extract.Text(path) if readErr != nil { allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr)) return nil } source := req.Source if source == "" { source = filepath.Base(path) } result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun) if runErr != nil { allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr)) return nil } allPages = append(allPages, result.Pages...) allWarnings = append(allWarnings, result.Warnings...) return nil }) if err != nil { h.logger.Error("walk dir failed", "path", req.Path, "err", err) writeError(w, http.StatusInternalServerError, fmt.Sprintf("walk error: %v", err)) return } } else { ext := strings.ToLower(filepath.Ext(req.Path)) if !supportedExtensions[ext] { writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext)) return } content, readErr := extract.Text(req.Path) if readErr != nil { writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr)) return } source := req.Source if source == "" { source = filepath.Base(req.Path) } result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun) if runErr != nil { h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr) writeError(w, http.StatusInternalServerError, "ingest error") return } allPages = result.Pages allWarnings = result.Warnings } if allPages == nil { allPages = []string{} } if allWarnings == nil { allWarnings = []string{} } writeJSON(w, ingestResponse{Pages: allPages, Warnings: allWarnings}) } type ingestRawRequest struct { Source string `json:"source"` Pages []pipeline.RawPage `json:"pages"` DryRun bool `json:"dry_run"` } // IngestRaw handles POST /ingest-raw — run the pipeline on pre-parsed RawPages, // skipping the LLM extraction step. Use when the caller has already produced // structured page data (e.g. from a more capable model or manual curation). func (h *Handler) IngestRaw(w http.ResponseWriter, r *http.Request) { var req ingestRawRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Source) == "" { writeError(w, http.StatusBadRequest, "source is required") return } if len(req.Pages) == 0 { writeError(w, http.StatusBadRequest, "pages is required and must be non-empty") return } result, err := pipeline.RunRaw(h.brainDir, req.Source, req.Pages, req.DryRun) if err != nil { h.logger.Error("ingest-raw failed", "source", req.Source, "err", err) writeError(w, http.StatusInternalServerError, "ingest error") return } pages := result.Pages if pages == nil { pages = []string{} } warnings := result.Warnings if warnings == nil { warnings = []string{} } writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings}) } // BackfillRefs handles POST /backfill-refs — injects source back-references // into all concept and entity pages based on existing wiki/sources/ pages. func (h *Handler) BackfillRefs(w http.ResponseWriter, r *http.Request) { n, err := pipeline.BackfillRefs(r.Context(), h.brainDir) if err != nil { h.logger.Error("backfill-refs failed", "err", err) writeError(w, http.StatusInternalServerError, "backfill error") return } writeJSON(w, map[string]int{"updated": n}) } func writeJSON(w http.ResponseWriter, v any) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(v) //nolint:errcheck } func writeError(w http.ResponseWriter, code int, msg string) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) json.NewEncoder(w).Encode(map[string]string{"error": msg}) //nolint:errcheck }