// ingestion/internal/pipeline/build.go package pipeline import ( "fmt" "strings" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) // BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs, // paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested // (derived from the filename, not the LLM title). func BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page { out := make([]wiki.Page, 0, len(rawPages)) for _, rp := range rawPages { out = append(out, buildPage(rp, sourceSlug, date)) } return out } func buildPage(rp RawPage, sourceSlug, date string) wiki.Page { var slug, dir string switch rp.Type { case "source": slug = sourceSlug dir = "wiki/sources" case "concept": slug = wiki.Slug(rp.Title) dir = "wiki/concepts" case "entity": slug = wiki.Slug(rp.Title) dir = "wiki/entities" default: slug = wiki.Slug(rp.Title) dir = "wiki/" + rp.Type } path := dir + "/" + slug + ".md" fm := buildFrontmatter(rp, date) return wiki.Page{ Path: path, Content: fm + "\n" + rp.Content, } } func buildFrontmatter(rp RawPage, date string) string { var sb strings.Builder sb.WriteString("---\n") fmt.Fprintf(&sb, "title: %s\n", yamlScalar(rp.Title)) switch rp.Type { case "source": subtype := rp.Subtype if subtype == "" { subtype = "article" } fmt.Fprintf(&sb, "type: %s\n", yamlScalar(subtype)) if rp.Domain != "" { fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain)) } fmt.Fprintf(&sb, "date_ingested: %s\n", date) fmt.Fprintf(&sb, "last_updated: %s\n", date) case "concept": if rp.Domain != "" { fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain)) } fmt.Fprintf(&sb, "last_updated: %s\n", date) case "entity": if rp.Subtype != "" { fmt.Fprintf(&sb, "type: %s\n", yamlScalar(rp.Subtype)) } if rp.Domain != "" { fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain)) } fmt.Fprintf(&sb, "last_updated: %s\n", date) default: if rp.Domain != "" { fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain)) } fmt.Fprintf(&sb, "last_updated: %s\n", date) } fmt.Fprintf(&sb, "aliases:\n - %s\n", yamlScalar(rp.Title)) sb.WriteString("---\n") return sb.String() } func yamlScalar(s string) string { return "'" + strings.ReplaceAll(s, "'", "''") + "'" }