feat(pipeline): add BuildPages — compute slugs/paths/frontmatter from RawPage
This commit is contained in:
88
ingestion/internal/pipeline/build.go
Normal file
88
ingestion/internal/pipeline/build.go
Normal file
@@ -0,0 +1,88 @@
|
||||
// ingestion/internal/pipeline/build.go
|
||||
package pipeline
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
// BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs,
|
||||
// paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested
|
||||
// (derived from the filename, not the LLM title).
|
||||
func BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page {
|
||||
out := make([]wiki.Page, 0, len(rawPages))
|
||||
for _, rp := range rawPages {
|
||||
out = append(out, buildPage(rp, sourceSlug, date))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
|
||||
var slug, dir string
|
||||
switch rp.Type {
|
||||
case "source":
|
||||
slug = sourceSlug
|
||||
dir = "wiki/sources"
|
||||
case "concept":
|
||||
slug = wiki.Slug(rp.Title)
|
||||
dir = "wiki/concepts"
|
||||
case "entity":
|
||||
slug = wiki.Slug(rp.Title)
|
||||
dir = "wiki/entities"
|
||||
default:
|
||||
slug = wiki.Slug(rp.Title)
|
||||
dir = "wiki/" + rp.Type
|
||||
}
|
||||
|
||||
path := dir + "/" + slug + ".md"
|
||||
fm := buildFrontmatter(rp, date)
|
||||
|
||||
return wiki.Page{
|
||||
Path: path,
|
||||
Content: fm + "\n" + rp.Content,
|
||||
}
|
||||
}
|
||||
|
||||
func buildFrontmatter(rp RawPage, date string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString("---\n")
|
||||
fmt.Fprintf(&sb, "title: %s\n", rp.Title)
|
||||
|
||||
switch rp.Type {
|
||||
case "source":
|
||||
subtype := rp.Subtype
|
||||
if subtype == "" {
|
||||
subtype = "article"
|
||||
}
|
||||
fmt.Fprintf(&sb, "type: %s\n", subtype)
|
||||
if rp.Domain != "" {
|
||||
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
|
||||
}
|
||||
fmt.Fprintf(&sb, "date_ingested: %s\n", date)
|
||||
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||
case "concept":
|
||||
if rp.Domain != "" {
|
||||
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
|
||||
}
|
||||
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||
case "entity":
|
||||
if rp.Subtype != "" {
|
||||
fmt.Fprintf(&sb, "type: %s\n", rp.Subtype)
|
||||
}
|
||||
if rp.Domain != "" {
|
||||
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
|
||||
}
|
||||
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||
default:
|
||||
if rp.Domain != "" {
|
||||
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
|
||||
}
|
||||
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
||||
}
|
||||
|
||||
fmt.Fprintf(&sb, "aliases:\n - %s\n", rp.Title)
|
||||
sb.WriteString("---\n")
|
||||
return sb.String()
|
||||
}
|
||||
Reference in New Issue
Block a user