fix(pipeline): quote YAML scalar fields in buildFrontmatter to prevent injection

This commit is contained in:
Mathias Bergqvist
2026-04-23 18:56:39 +02:00
parent 7b57051af8
commit a7b363d589
2 changed files with 45 additions and 20 deletions

View File

@@ -48,7 +48,7 @@ func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
func buildFrontmatter(rp RawPage, date string) string { func buildFrontmatter(rp RawPage, date string) string {
var sb strings.Builder var sb strings.Builder
sb.WriteString("---\n") sb.WriteString("---\n")
fmt.Fprintf(&sb, "title: %s\n", rp.Title) fmt.Fprintf(&sb, "title: %s\n", yamlScalar(rp.Title))
switch rp.Type { switch rp.Type {
case "source": case "source":
@@ -56,33 +56,37 @@ func buildFrontmatter(rp RawPage, date string) string {
if subtype == "" { if subtype == "" {
subtype = "article" subtype = "article"
} }
fmt.Fprintf(&sb, "type: %s\n", subtype) fmt.Fprintf(&sb, "type: %s\n", yamlScalar(subtype))
if rp.Domain != "" { if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain) fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
} }
fmt.Fprintf(&sb, "date_ingested: %s\n", date) fmt.Fprintf(&sb, "date_ingested: %s\n", date)
fmt.Fprintf(&sb, "last_updated: %s\n", date) fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "concept": case "concept":
if rp.Domain != "" { if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain) fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
} }
fmt.Fprintf(&sb, "last_updated: %s\n", date) fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "entity": case "entity":
if rp.Subtype != "" { if rp.Subtype != "" {
fmt.Fprintf(&sb, "type: %s\n", rp.Subtype) fmt.Fprintf(&sb, "type: %s\n", yamlScalar(rp.Subtype))
} }
if rp.Domain != "" { if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain) fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
} }
fmt.Fprintf(&sb, "last_updated: %s\n", date) fmt.Fprintf(&sb, "last_updated: %s\n", date)
default: default:
if rp.Domain != "" { if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain) fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
} }
fmt.Fprintf(&sb, "last_updated: %s\n", date) fmt.Fprintf(&sb, "last_updated: %s\n", date)
} }
fmt.Fprintf(&sb, "aliases:\n - %s\n", rp.Title) fmt.Fprintf(&sb, "aliases:\n - %s\n", yamlScalar(rp.Title))
sb.WriteString("---\n") sb.WriteString("---\n")
return sb.String() return sb.String()
} }
func yamlScalar(s string) string {
return "'" + strings.ReplaceAll(s, "'", "''") + "'"
}

View File

@@ -24,12 +24,12 @@ func TestBuildPages_SourcePage(t *testing.T) {
p := pages[0] p := pages[0]
assert.Equal(t, "wiki/sources/shape-up.md", p.Path) assert.Equal(t, "wiki/sources/shape-up.md", p.Path)
assert.Contains(t, p.Content, "title: Shape Up") assert.Contains(t, p.Content, "title: 'Shape Up'")
assert.Contains(t, p.Content, "type: book") assert.Contains(t, p.Content, "type: 'book'")
assert.Contains(t, p.Content, "domain: product-strategy") assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "date_ingested: 2026-04-23") assert.Contains(t, p.Content, "date_ingested: 2026-04-23")
assert.Contains(t, p.Content, "last_updated: 2026-04-23") assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Shape Up") assert.Contains(t, p.Content, "aliases:\n - 'Shape Up'")
assert.Contains(t, p.Content, "## Summary") assert.Contains(t, p.Content, "## Summary")
assert.True(t, strings.HasPrefix(p.Content, "---\n"), "content must start with frontmatter") assert.True(t, strings.HasPrefix(p.Content, "---\n"), "content must start with frontmatter")
} }
@@ -48,10 +48,10 @@ func TestBuildPages_ConceptPage(t *testing.T) {
p := pages[0] p := pages[0]
assert.Equal(t, "wiki/concepts/betting.md", p.Path) assert.Equal(t, "wiki/concepts/betting.md", p.Path)
assert.Contains(t, p.Content, "title: Betting") assert.Contains(t, p.Content, "title: 'Betting'")
assert.Contains(t, p.Content, "domain: product-strategy") assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "last_updated: 2026-04-23") assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Betting") assert.Contains(t, p.Content, "aliases:\n - 'Betting'")
assert.NotContains(t, p.Content, "date_ingested") assert.NotContains(t, p.Content, "date_ingested")
assert.Contains(t, p.Content, "## Definition") assert.Contains(t, p.Content, "## Definition")
} }
@@ -71,11 +71,11 @@ func TestBuildPages_EntityPage(t *testing.T) {
p := pages[0] p := pages[0]
assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path) assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path)
assert.Contains(t, p.Content, "title: Ryan Singer") assert.Contains(t, p.Content, "title: 'Ryan Singer'")
assert.Contains(t, p.Content, "type: person") assert.Contains(t, p.Content, "type: 'person'")
assert.Contains(t, p.Content, "domain: product-strategy") assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "last_updated: 2026-04-23") assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Ryan Singer") assert.Contains(t, p.Content, "aliases:\n - 'Ryan Singer'")
assert.NotContains(t, p.Content, "date_ingested") assert.NotContains(t, p.Content, "date_ingested")
} }
@@ -105,7 +105,7 @@ func TestBuildPages_SourceDefaultSubtype(t *testing.T) {
} }
pages := BuildPages(raw, "some-post", "2026-04-23") pages := BuildPages(raw, "some-post", "2026-04-23")
require.Len(t, pages, 1) require.Len(t, pages, 1)
assert.Contains(t, pages[0].Content, "type: article") assert.Contains(t, pages[0].Content, "type: 'article'")
} }
func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) { func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) {
@@ -129,3 +129,24 @@ func TestBuildPages_MultiplePages(t *testing.T) {
assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path) assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path)
assert.Equal(t, "wiki/entities/ryan-singer.md", pages[2].Path) assert.Equal(t, "wiki/entities/ryan-singer.md", pages[2].Path)
} }
func TestBuildPages_TitleWithColon(t *testing.T) {
raw := []RawPage{
{Title: "Shape Up: The Basecamp Method", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
}
pages := BuildPages(raw, "shape-up", "2026-04-23")
require.Len(t, pages, 1)
// Title with colon must be quoted in YAML
assert.Contains(t, pages[0].Content, "title: 'Shape Up: The Basecamp Method'")
assert.Contains(t, pages[0].Content, "aliases:\n - 'Shape Up: The Basecamp Method'")
}
func TestBuildPages_EntityNoSubtype(t *testing.T) {
raw := []RawPage{
{Title: "Basecamp", Type: "entity", Content: "## Description\n\nA company.\n"},
}
pages := BuildPages(raw, "src", "2026-04-23")
require.Len(t, pages, 1)
assert.NotContains(t, pages[0].Content, "type:")
assert.Contains(t, pages[0].Content, "title: 'Basecamp'")
}