fix(pipeline): skip RawPages with empty title in BuildPages instead of producing broken paths
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,13 +10,27 @@ import (
|
||||
|
||||
// BuildPages converts RawPages from the LLM into wiki.Pages with computed slugs,
|
||||
// paths, and YAML frontmatter. sourceSlug is the slug of the source being ingested
|
||||
// (derived from the filename, not the LLM title).
|
||||
func BuildPages(rawPages []RawPage, sourceSlug, date string) []wiki.Page {
|
||||
// (derived from the filename, not the LLM title). Pages whose title resolves to an
|
||||
// empty slug are skipped and returned as warnings instead.
|
||||
func BuildPages(rawPages []RawPage, sourceSlug, date string) ([]wiki.Page, []string) {
|
||||
out := make([]wiki.Page, 0, len(rawPages))
|
||||
var warnings []string
|
||||
for _, rp := range rawPages {
|
||||
slug := computeSlug(rp, sourceSlug)
|
||||
if slug == "" {
|
||||
warnings = append(warnings, fmt.Sprintf("skipped page with empty title (type: %s)", rp.Type))
|
||||
continue
|
||||
}
|
||||
out = append(out, buildPage(rp, sourceSlug, date))
|
||||
}
|
||||
return out
|
||||
return out, warnings
|
||||
}
|
||||
|
||||
func computeSlug(rp RawPage, sourceSlug string) string {
|
||||
if rp.Type == "source" {
|
||||
return sourceSlug
|
||||
}
|
||||
return wiki.Slug(rp.Title)
|
||||
}
|
||||
|
||||
func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
|
||||
|
||||
@@ -19,8 +19,9 @@ func TestBuildPages_SourcePage(t *testing.T) {
|
||||
Content: "## Summary\n\nA book about shaping product work.\n",
|
||||
},
|
||||
}
|
||||
pages := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Empty(t, warnings)
|
||||
|
||||
p := pages[0]
|
||||
assert.Equal(t, "wiki/sources/shape-up.md", p.Path)
|
||||
@@ -43,8 +44,9 @@ func TestBuildPages_ConceptPage(t *testing.T) {
|
||||
Content: "## Definition\n\nA resource allocation technique.\n",
|
||||
},
|
||||
}
|
||||
pages := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Empty(t, warnings)
|
||||
|
||||
p := pages[0]
|
||||
assert.Equal(t, "wiki/concepts/betting.md", p.Path)
|
||||
@@ -66,8 +68,9 @@ func TestBuildPages_EntityPage(t *testing.T) {
|
||||
Content: "## Description\n\nA product designer.\n",
|
||||
},
|
||||
}
|
||||
pages := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
pages, warnings := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Empty(t, warnings)
|
||||
|
||||
p := pages[0]
|
||||
assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path)
|
||||
@@ -84,7 +87,7 @@ func TestBuildPages_SourceSlugUsedForSourcePage(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "FinBERT: A Pretrained Model", Type: "source", Subtype: "article", Content: "## Summary\n\nA model.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "finbert-huggingface", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "finbert-huggingface", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Equal(t, "wiki/sources/finbert-huggingface.md", pages[0].Path)
|
||||
}
|
||||
@@ -93,7 +96,7 @@ func TestBuildPages_ConceptSlugDerivedFromTitle(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "Domain-Driven Design", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "some-source", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "some-source", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Equal(t, "wiki/concepts/domain-driven-design.md", pages[0].Path)
|
||||
}
|
||||
@@ -103,7 +106,7 @@ func TestBuildPages_SourceDefaultSubtype(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "Some Post", Type: "source", Content: "## Summary\n\nA post.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "some-post", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "some-post", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.Contains(t, pages[0].Content, "type: 'article'")
|
||||
}
|
||||
@@ -112,7 +115,7 @@ func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "Betting", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "src", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.NotContains(t, pages[0].Content, "domain:")
|
||||
}
|
||||
@@ -123,7 +126,7 @@ func TestBuildPages_MultiplePages(t *testing.T) {
|
||||
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
||||
{Title: "Ryan Singer", Type: "entity", Subtype: "person", Content: "## Description\n\nA designer.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
require.Len(t, pages, 3)
|
||||
assert.Equal(t, "wiki/sources/shape-up.md", pages[0].Path)
|
||||
assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path)
|
||||
@@ -134,7 +137,7 @@ func TestBuildPages_TitleWithColon(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "Shape Up: The Basecamp Method", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "shape-up", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
// Title with colon must be quoted in YAML
|
||||
assert.Contains(t, pages[0].Content, "title: 'Shape Up: The Basecamp Method'")
|
||||
@@ -145,8 +148,20 @@ func TestBuildPages_EntityNoSubtype(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "Basecamp", Type: "entity", Content: "## Description\n\nA company.\n"},
|
||||
}
|
||||
pages := BuildPages(raw, "src", "2026-04-23")
|
||||
pages, _ := BuildPages(raw, "src", "2026-04-23")
|
||||
require.Len(t, pages, 1)
|
||||
assert.NotContains(t, pages[0].Content, "type:")
|
||||
assert.Contains(t, pages[0].Content, "title: 'Basecamp'")
|
||||
}
|
||||
|
||||
func TestBuildPages_EmptyTitleSkippedWithWarning(t *testing.T) {
|
||||
raw := []RawPage{
|
||||
{Title: "", Type: "concept", Content: "## Definition\n\nFoo.\n"},
|
||||
{Title: "Betting", Type: "concept", Content: "## Definition\n\nA technique.\n"},
|
||||
}
|
||||
pages, warnings := BuildPages(raw, "src", "2026-04-23")
|
||||
require.Len(t, pages, 1, "empty-title page should be skipped")
|
||||
assert.Equal(t, "wiki/concepts/betting.md", pages[0].Path)
|
||||
assert.Len(t, warnings, 1)
|
||||
assert.Contains(t, warnings[0], "empty title")
|
||||
}
|
||||
|
||||
@@ -59,7 +59,8 @@ func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryR
|
||||
allWarnings = append(allWarnings, warnings...)
|
||||
}
|
||||
|
||||
pages := BuildPages(allRaw, sourceSlug, date)
|
||||
pages, buildWarnings := BuildPages(allRaw, sourceSlug, date)
|
||||
allWarnings = append(allWarnings, buildWarnings...)
|
||||
resolved := Resolve(pages, inventory)
|
||||
canonicalized, linkWarnings := CanonicalizeLinks(resolved, inventory)
|
||||
allWarnings = append(allWarnings, linkWarnings...)
|
||||
|
||||
Reference in New Issue
Block a user