// ingestion/internal/pipeline/resolve.go package pipeline import ( "path/filepath" "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) // Resolve remaps proposed pages to existing slugs when a fuzzy title match is found. // It only matches within the same page type (entities→entities, concepts→concepts). // Pages with no inventory match are returned unchanged. func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page { type key struct { pt wiki.PageType normalized string } lookup := make(map[key]string) // key → canonical slug for pt, entries := range inventory { for _, e := range entries { k := key{pt: pt, normalized: normalizeTitle(e.Title)} lookup[k] = e.Slug for _, alias := range e.Aliases { ak := key{pt: pt, normalized: normalizeTitle(alias)} if _, exists := lookup[ak]; !exists { lookup[ak] = e.Slug } } } } out := make([]wiki.Page, 0, len(proposed)) for _, page := range proposed { pt := pageTypeFromPath(page.Path) title := extractTitle(page.Content) k := key{pt: pt, normalized: normalizeTitle(title)} if canonicalSlug, ok := lookup[k]; ok { dir := filepath.Dir(page.Path) page.Path = dir + "/" + canonicalSlug + ".md" } out = append(out, page) } return out } // normalizeTitle lowercases, removes leading articles, collapses whitespace. // "The Shape Up Method" → "shape up method" func normalizeTitle(s string) string { s = strings.ToLower(strings.TrimSpace(s)) for _, article := range []string{"the ", "a ", "an "} { s = strings.TrimPrefix(s, article) } s = strings.ReplaceAll(s, "-", " ") return strings.Join(strings.Fields(s), " ") } // pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md". func pageTypeFromPath(path string) wiki.PageType { parts := strings.Split(filepath.ToSlash(path), "/") if len(parts) >= 2 { return wiki.PageType(parts[1]) } return "" } // extractTitle reads the title field from YAML frontmatter in content. // Falls back to empty string if not found. func extractTitle(content string) string { lines := strings.SplitN(content, "\n", 30) inFM := false for _, line := range lines { if strings.TrimSpace(line) == "---" { if !inFM { inFM = true continue } break } if inFM { key, val, ok := strings.Cut(line, ":") if ok && strings.TrimSpace(key) == "title" { return strings.Trim(strings.TrimSpace(val), `"'`) } } } return "" }