// ingestion/internal/pipeline/refs.go package pipeline import ( "os" "path/filepath" "regexp" "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`) // injectSourceRefs finds the source page in the proposed batch, extracts its // wikilinks, and injects a back-reference into every linked concept or entity page. // Pages that exist on disk but are not in the current batch are loaded and // appended so they will be updated on write. func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page { sourceSlug, sourceTitle, found := findSourcePage(pages) if !found { return pages } var sourceContent string for _, p := range pages { if strings.HasPrefix(p.Path, "wiki/sources/") && strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug { sourceContent = p.Content break } } linkedSlugs := extractWikilinks(sourceContent) sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]" bySlug := make(map[string]int, len(pages)) for i, p := range pages { if !strings.HasPrefix(p.Path, "wiki/sources/") { bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i } } for slug := range linkedSlugs { if slug == sourceSlug { continue } if idx, ok := bySlug[slug]; ok { pages[idx] = addSourceRef(pages[idx], sourceRef) continue } pt, ok := findInInventory(slug, inventory) if !ok { continue } diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md") b, err := os.ReadFile(diskPath) if err != nil { continue } page := wiki.Page{ Path: "wiki/" + string(pt) + "/" + slug + ".md", Content: string(b), } pages = append(pages, addSourceRef(page, sourceRef)) } return pages } // addSourceRef injects sourceRef into the ## Sources bullet section of page // using wiki.Merge, which deduplicates bullets automatically. func addSourceRef(page wiki.Page, sourceRef string) wiki.Page { patch := wiki.Page{ Path: page.Path, Content: "\n## Sources\n\n" + sourceRef + "\n", } return wiki.Merge(page, patch) } // extractWikilinks returns the set of slugs referenced as [[slug|...]] in content. func extractWikilinks(content string) map[string]bool { slugs := make(map[string]bool) for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) { slugs[m[1]] = true } return slugs } // findSourcePage returns the slug and title of the first wiki/sources/ page in pages. func findSourcePage(pages []wiki.Page) (slug, title string, found bool) { for _, p := range pages { if strings.HasPrefix(p.Path, "wiki/sources/") { slug = strings.TrimSuffix(filepath.Base(p.Path), ".md") title = extractTitle(p.Content) if title == "" { title = slug } return slug, title, true } } return "", "", false } // findInInventory returns the PageType for a slug if it appears in the inventory. func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) { for pt, entries := range inventory { for _, e := range entries { if e.Slug == slug { return pt, true } } } return "", false }