fix(pipeline): quote YAML scalar fields in buildFrontmatter to prevent injection

This commit is contained in:
Mathias Bergqvist
2026-04-23 18:56:39 +02:00
parent 7b57051af8
commit a7b363d589
2 changed files with 45 additions and 20 deletions

View File

@@ -48,7 +48,7 @@ func buildPage(rp RawPage, sourceSlug, date string) wiki.Page {
func buildFrontmatter(rp RawPage, date string) string {
var sb strings.Builder
sb.WriteString("---\n")
fmt.Fprintf(&sb, "title: %s\n", rp.Title)
fmt.Fprintf(&sb, "title: %s\n", yamlScalar(rp.Title))
switch rp.Type {
case "source":
@@ -56,33 +56,37 @@ func buildFrontmatter(rp RawPage, date string) string {
if subtype == "" {
subtype = "article"
}
fmt.Fprintf(&sb, "type: %s\n", subtype)
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(subtype))
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
}
fmt.Fprintf(&sb, "date_ingested: %s\n", date)
fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "concept":
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
case "entity":
if rp.Subtype != "" {
fmt.Fprintf(&sb, "type: %s\n", rp.Subtype)
fmt.Fprintf(&sb, "type: %s\n", yamlScalar(rp.Subtype))
}
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
default:
if rp.Domain != "" {
fmt.Fprintf(&sb, "domain: %s\n", rp.Domain)
fmt.Fprintf(&sb, "domain: %s\n", yamlScalar(rp.Domain))
}
fmt.Fprintf(&sb, "last_updated: %s\n", date)
}
fmt.Fprintf(&sb, "aliases:\n - %s\n", rp.Title)
fmt.Fprintf(&sb, "aliases:\n - %s\n", yamlScalar(rp.Title))
sb.WriteString("---\n")
return sb.String()
}
func yamlScalar(s string) string {
return "'" + strings.ReplaceAll(s, "'", "''") + "'"
}

View File

@@ -24,12 +24,12 @@ func TestBuildPages_SourcePage(t *testing.T) {
p := pages[0]
assert.Equal(t, "wiki/sources/shape-up.md", p.Path)
assert.Contains(t, p.Content, "title: Shape Up")
assert.Contains(t, p.Content, "type: book")
assert.Contains(t, p.Content, "domain: product-strategy")
assert.Contains(t, p.Content, "title: 'Shape Up'")
assert.Contains(t, p.Content, "type: 'book'")
assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "date_ingested: 2026-04-23")
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Shape Up")
assert.Contains(t, p.Content, "aliases:\n - 'Shape Up'")
assert.Contains(t, p.Content, "## Summary")
assert.True(t, strings.HasPrefix(p.Content, "---\n"), "content must start with frontmatter")
}
@@ -48,10 +48,10 @@ func TestBuildPages_ConceptPage(t *testing.T) {
p := pages[0]
assert.Equal(t, "wiki/concepts/betting.md", p.Path)
assert.Contains(t, p.Content, "title: Betting")
assert.Contains(t, p.Content, "domain: product-strategy")
assert.Contains(t, p.Content, "title: 'Betting'")
assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Betting")
assert.Contains(t, p.Content, "aliases:\n - 'Betting'")
assert.NotContains(t, p.Content, "date_ingested")
assert.Contains(t, p.Content, "## Definition")
}
@@ -71,11 +71,11 @@ func TestBuildPages_EntityPage(t *testing.T) {
p := pages[0]
assert.Equal(t, "wiki/entities/ryan-singer.md", p.Path)
assert.Contains(t, p.Content, "title: Ryan Singer")
assert.Contains(t, p.Content, "type: person")
assert.Contains(t, p.Content, "domain: product-strategy")
assert.Contains(t, p.Content, "title: 'Ryan Singer'")
assert.Contains(t, p.Content, "type: 'person'")
assert.Contains(t, p.Content, "domain: 'product-strategy'")
assert.Contains(t, p.Content, "last_updated: 2026-04-23")
assert.Contains(t, p.Content, "aliases:\n - Ryan Singer")
assert.Contains(t, p.Content, "aliases:\n - 'Ryan Singer'")
assert.NotContains(t, p.Content, "date_ingested")
}
@@ -105,7 +105,7 @@ func TestBuildPages_SourceDefaultSubtype(t *testing.T) {
}
pages := BuildPages(raw, "some-post", "2026-04-23")
require.Len(t, pages, 1)
assert.Contains(t, pages[0].Content, "type: article")
assert.Contains(t, pages[0].Content, "type: 'article'")
}
func TestBuildPages_OmitsDomainWhenEmpty(t *testing.T) {
@@ -129,3 +129,24 @@ func TestBuildPages_MultiplePages(t *testing.T) {
assert.Equal(t, "wiki/concepts/betting.md", pages[1].Path)
assert.Equal(t, "wiki/entities/ryan-singer.md", pages[2].Path)
}
func TestBuildPages_TitleWithColon(t *testing.T) {
raw := []RawPage{
{Title: "Shape Up: The Basecamp Method", Type: "source", Subtype: "book", Content: "## Summary\n\nA book.\n"},
}
pages := BuildPages(raw, "shape-up", "2026-04-23")
require.Len(t, pages, 1)
// Title with colon must be quoted in YAML
assert.Contains(t, pages[0].Content, "title: 'Shape Up: The Basecamp Method'")
assert.Contains(t, pages[0].Content, "aliases:\n - 'Shape Up: The Basecamp Method'")
}
func TestBuildPages_EntityNoSubtype(t *testing.T) {
raw := []RawPage{
{Title: "Basecamp", Type: "entity", Content: "## Description\n\nA company.\n"},
}
pages := BuildPages(raw, "src", "2026-04-23")
require.Len(t, pages, 1)
assert.NotContains(t, pages[0].Content, "type:")
assert.Contains(t, pages[0].Content, "title: 'Basecamp'")
}