93 lines
2.3 KiB
Go
93 lines
2.3 KiB
Go
// ingestion/internal/pipeline/build.go
|
|
package pipeline
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
// BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs,
|
|
// paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested
|
|
// (derived from the filename, not the LLM title).
|
|
func BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page {
|
|
out := make([]wiki.Page, 0, len(rawPages))
|
|
for _, rp := range rawPages {
|
|
out = append(out, buildPage(rp, sourceSlug, date))
|
|
}
|
|
return out
|
|
}
|
|
|
|
func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
|
|
var slug, dir string
|
|
switch rp.Type {
|
|
case "source":
|
|
slug = sourceSlug
|
|
dir = "wiki/sources"
|
|
case "concept":
|
|
slug = wiki.Slug(rp.Title)
|
|
dir = "wiki/concepts"
|
|
case "entity":
|
|
slug = wiki.Slug(rp.Title)
|
|
dir = "wiki/entities"
|
|
default:
|
|
slug = wiki.Slug(rp.Title)
|
|
dir = "wiki/" + rp.Type
|
|
}
|
|
|
|
path := dir + "/" + slug + ".md"
|
|
fm := buildFrontmatter(rp, date)
|
|
|
|
return wiki.Page{
|
|
Path: path,
|
|
Content: fm + "\n" + rp.Content,
|
|
}
|
|
}
|
|
|
|
func buildFrontmatter(rp RawPage, date string) string {
|
|
var sb strings.Builder
|
|
sb.WriteString("---\n")
|
|
fmt.Fprintf(&sb, "title: %s\n", yamlScalar(rp.Title))
|
|
|
|
switch rp.Type {
|
|
case "source":
|
|
subtype := rp.Subtype
|
|
if subtype == "" {
|
|
subtype = "article"
|
|
}
|
|
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(subtype))
|
|
if rp.Domain != "" {
|
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
|
}
|
|
fmt.Fprintf(&sb, "date_ingested: %s\n", date)
|
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
|
case "concept":
|
|
if rp.Domain != "" {
|
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
|
}
|
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
|
case "entity":
|
|
if rp.Subtype != "" {
|
|
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(rp.Subtype))
|
|
}
|
|
if rp.Domain != "" {
|
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
|
}
|
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
|
default:
|
|
if rp.Domain != "" {
|
|
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
|
|
}
|
|
fmt.Fprintf(&sb, "last_updated: %s\n", date)
|
|
}
|
|
|
|
fmt.Fprintf(&sb, "aliases:\n - %s\n", yamlScalar(rp.Title))
|
|
sb.WriteString("---\n")
|
|
return sb.String()
|
|
}
|
|
|
|
func yamlScalar(s string) string {
|
|
return "'" + strings.ReplaceAll(s, "'", "''") + "'"
|
|
}
|