// ingestion/internal/api/handler.go package api import ( "encoding/json" "fmt" "log/slog" "net/http" "os" "path/filepath" "strings" "time" "github.com/mathiasbq/hyperguild/ingestion/internal/extract" "github.com/mathiasbq/hyperguild/ingestion/internal/pipeline" "github.com/mathiasbq/hyperguild/ingestion/internal/search" ) // Handler serves the ingestion HTTP API. type Handler struct { brainDir string logger *slog.Logger pipeline pipeline.Config } // NewHandler constructs a Handler. brainDir is the absolute path to brain/. func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Config) *Handler { if logger == nil { logger = slog.Default() } return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg} } type queryRequest struct { Query string `json:"query"` Limit int `json:"limit,omitempty"` } type writeRequest struct { Content string `json:"content"` Filename string `json:"filename,omitempty"` Type string `json:"type,omitempty"` Domain string `json:"domain,omitempty"` } type ingestRequest struct { Content string `json:"content"` Source string `json:"source"` DryRun bool `json:"dry_run"` } type ingestPathRequest struct { Path string `json:"path"` Source string `json:"source"` DryRun bool `json:"dry_run"` } type ingestResponse struct { Pages []string `json:"pages"` Warnings []string `json:"warnings"` } // Query handles POST /query — full-text search across the brain wiki. func (h *Handler) Query(w http.ResponseWriter, r *http.Request) { var req queryRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Query) == "" { writeError(w, http.StatusBadRequest, "query is required") return } if req.Limit == 0 { req.Limit = 5 } results, err := search.Query(h.brainDir, req.Query, req.Limit) if err != nil { h.logger.Error("query failed", "err", err) writeError(w, http.StatusInternalServerError, "search error") return } writeJSON(w, map[string]any{"results": results}) } // WriteNote writes a markdown file to brainDir/knowledge/, optionally // prefixed with YAML frontmatter built from typ and domain. Returns the path // relative to brainDir (forward-slashed). Filename traversal is rejected. func WriteNote(brainDir, content, filename, typ, domain string) (string, error) { if content == "" { return "", fmt.Errorf("content is required") } if filename == "" { filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405")) } rawDir := filepath.Join(brainDir, "knowledge") if err := os.MkdirAll(rawDir, 0o755); err != nil { return "", fmt.Errorf("create raw dir: %w", err) } finalContent := content if typ != "" || domain != "" { var fm strings.Builder fm.WriteString("---\n") if typ != "" { fmt.Fprintf(&fm, "type: %s\n", typ) } if domain != "" { fmt.Fprintf(&fm, "domain: %s\n", domain) } fm.WriteString("---\n") finalContent = fm.String() + content } // Reject path separators outright; any non-flat filename is misuse. if strings.ContainsAny(filename, `/\`) { return "", fmt.Errorf("invalid filename") } base := filepath.Base(filename) // After Base, "." and ".." remain. Reject those before adding .md. if base == "." || base == ".." || base == "" { return "", fmt.Errorf("invalid filename") } if !strings.HasSuffix(base, ".md") { base += ".md" } dest := filepath.Join(rawDir, base) if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil { return "", fmt.Errorf("write: %w", err) } rel, _ := filepath.Rel(brainDir, dest) return filepath.ToSlash(rel), nil } // Write handles POST /write — write raw content to brain/knowledge/. func (h *Handler) Write(w http.ResponseWriter, r *http.Request) { var req writeRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } relPath, err := WriteNote(h.brainDir, req.Content, req.Filename, req.Type, req.Domain) if err != nil { h.logger.Error("write failed", "err", err) writeError(w, http.StatusBadRequest, err.Error()) return } writeJSON(w, map[string]string{"path": relPath}) } // Ingest handles POST /ingest — run the pipeline on provided content. func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) { var req ingestRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Content) == "" { writeError(w, http.StatusBadRequest, "content is required") return } if strings.TrimSpace(req.Source) == "" { writeError(w, http.StatusBadRequest, "source is required") return } result, err := pipeline.Run(r.Context(), h.pipeline, h.brainDir, req.Content, req.Source, req.DryRun) if err != nil { h.logger.Error("ingest failed", "source", req.Source, "err", err) writeError(w, http.StatusInternalServerError, "ingest error") return } pages := result.Pages if pages == nil { pages = []string{} } warnings := result.Warnings if warnings == nil { warnings = []string{} } writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings}) } // supportedExtensions lists file extensions that IngestPath will process. var supportedExtensions = map[string]bool{ ".md": true, ".txt": true, ".pdf": true, } // IngestPath handles POST /ingest-path — ingest a file or directory. func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) { var req ingestPathRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Path) == "" { writeError(w, http.StatusBadRequest, "path is required") return } info, err := os.Stat(req.Path) if err != nil { writeError(w, http.StatusBadRequest, fmt.Sprintf("path not accessible: %v", err)) return } var allPages []string var allWarnings []string if info.IsDir() { err = filepath.WalkDir(req.Path, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } ext := strings.ToLower(filepath.Ext(path)) if !supportedExtensions[ext] { return nil } content, readErr := extract.Text(path) if readErr != nil { allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr)) return nil } source := req.Source if source == "" { source = filepath.Base(path) } result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun) if runErr != nil { allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr)) return nil } allPages = append(allPages, result.Pages...) allWarnings = append(allWarnings, result.Warnings...) return nil }) if err != nil { h.logger.Error("walk dir failed", "path", req.Path, "err", err) writeError(w, http.StatusInternalServerError, fmt.Sprintf("walk error: %v", err)) return } } else { ext := strings.ToLower(filepath.Ext(req.Path)) if !supportedExtensions[ext] { writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext)) return } content, readErr := extract.Text(req.Path) if readErr != nil { writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr)) return } source := req.Source if source == "" { source = filepath.Base(req.Path) } result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun) if runErr != nil { h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr) writeError(w, http.StatusInternalServerError, "ingest error") return } allPages = result.Pages allWarnings = result.Warnings } if allPages == nil { allPages = []string{} } if allWarnings == nil { allWarnings = []string{} } writeJSON(w, ingestResponse{Pages: allPages, Warnings: allWarnings}) } type ingestRawRequest struct { Source string `json:"source"` Pages []pipeline.RawPage `json:"pages"` DryRun bool `json:"dry_run"` } // IngestRaw handles POST /ingest-raw — run the pipeline on pre-parsed RawPages, // skipping the LLM extraction step. Use when the caller has already produced // structured page data (e.g. from a more capable model or manual curation). func (h *Handler) IngestRaw(w http.ResponseWriter, r *http.Request) { var req ingestRawRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeError(w, http.StatusBadRequest, "invalid JSON") return } if strings.TrimSpace(req.Source) == "" { writeError(w, http.StatusBadRequest, "source is required") return } if len(req.Pages) == 0 { writeError(w, http.StatusBadRequest, "pages is required and must be non-empty") return } result, err := pipeline.RunRaw(h.brainDir, req.Source, req.Pages, req.DryRun) if err != nil { h.logger.Error("ingest-raw failed", "source", req.Source, "err", err) writeError(w, http.StatusInternalServerError, "ingest error") return } pages := result.Pages if pages == nil { pages = []string{} } warnings := result.Warnings if warnings == nil { warnings = []string{} } writeJSON(w, ingestResponse{Pages: pages, Warnings: warnings}) } // BackfillRefs handles POST /backfill-refs — injects source back-references // into all concept and entity pages based on existing wiki/sources/ pages. func (h *Handler) BackfillRefs(w http.ResponseWriter, r *http.Request) { n, err := pipeline.BackfillRefs(r.Context(), h.brainDir) if err != nil { h.logger.Error("backfill-refs failed", "err", err) writeError(w, http.StatusInternalServerError, "backfill error") return } writeJSON(w, map[string]int{"updated": n}) } func writeJSON(w http.ResponseWriter, v any) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(v) //nolint:errcheck } func writeError(w http.ResponseWriter, code int, msg string) { w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) json.NewEncoder(w).Encode(map[string]string{"error": msg}) //nolint:errcheck }