// ingestion/internal/pipeline/backfill.go package pipeline import ( "context" "fmt" "os" "path/filepath" "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) // BackfillRefs walks wiki/sources/ and injects source back-references into every // concept and entity page that each source links to. // Changes for all sources are accumulated in memory before writing, so multiple // sources referencing the same concept are merged in one pass. // Deduplication is handled by wiki.Merge — running this multiple times is safe. // Returns the number of concept/entity pages written. func BackfillRefs(ctx context.Context, brainDir string) (int, error) { inventory, err := wiki.LoadInventory(brainDir) if err != nil { return 0, fmt.Errorf("load inventory: %w", err) } sourcesDir := filepath.Join(brainDir, "wiki", "sources") entries, err := os.ReadDir(sourcesDir) if err != nil { if os.IsNotExist(err) { return 0, nil } return 0, fmt.Errorf("read sources dir: %w", err) } // Accumulate all changes before writing: relPath → updated Page. // Collecting first means two sources that both link the same concept // get both refs merged before a single write. pending := make(map[string]wiki.Page) for _, e := range entries { if ctx.Err() != nil { return 0, ctx.Err() } if e.IsDir() || !strings.HasSuffix(e.Name(), ".md") { continue } b, err := os.ReadFile(filepath.Join(sourcesDir, e.Name())) if err != nil { continue } sourceContent := string(b) sourceSlug := strings.TrimSuffix(e.Name(), ".md") sourceTitle := extractTitle(sourceContent) if sourceTitle == "" { sourceTitle = sourceSlug } sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]" for slug := range extractWikilinks(sourceContent) { if slug == sourceSlug { continue } pt, ok := findInInventory(slug, inventory) if !ok { continue } relPath := "wiki/" + string(pt) + "/" + slug + ".md" // Start from already-accumulated version if we've seen this page. page, seen := pending[relPath] if !seen { raw, err := os.ReadFile(filepath.Join(brainDir, filepath.FromSlash(relPath))) if err != nil { continue } page = wiki.Page{Path: relPath, Content: string(raw)} } pending[relPath] = addSourceRef(page, sourceRef) } } for relPath, page := range pending { dest := filepath.Join(brainDir, filepath.FromSlash(relPath)) if err := os.WriteFile(dest, []byte(page.Content), 0o644); err != nil { return 0, fmt.Errorf("write %s: %w", relPath, err) } } return len(pending), nil }