// ingestion/internal/pipeline/parse_test.go package pipeline import ( "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestParseRawPages_ValidJSON(t *testing.T) { input := `[{"title":"Shape Up","type":"source","subtype":"book","domain":"product-strategy","content":"## Summary\n\nFoo."},{"title":"Betting","type":"concept","content":"## Definition\n\nA technique."}]` pages, warnings := ParseRawPages(input) require.Len(t, pages, 2) assert.Empty(t, warnings) assert.Equal(t, "Shape Up", pages[0].Title) assert.Equal(t, "source", pages[0].Type) assert.Equal(t, "book", pages[0].Subtype) assert.Equal(t, "product-strategy", pages[0].Domain) assert.Equal(t, "Betting", pages[1].Title) assert.Equal(t, "concept", pages[1].Type) assert.Empty(t, pages[1].Subtype) } func TestParseRawPages_StripsFences(t *testing.T) { input := "```json\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"## Definition\\n\\nFoo.\"}]\n```" pages, warnings := ParseRawPages(input) require.Len(t, pages, 1) assert.Empty(t, warnings) assert.Equal(t, "Foo", pages[0].Title) } func TestParseRawPages_TruncationRecovery(t *testing.T) { input := `[{"title":"Foo","type":"concept","content":"## Definition\n\nFoo."},{"title":"Bar","type":"concept","content":"trunc` pages, warnings := ParseRawPages(input) require.Len(t, pages, 1) assert.Equal(t, "Foo", pages[0].Title) assert.NotEmpty(t, warnings) } func TestParseRawPages_EmptyInput(t *testing.T) { pages, warnings := ParseRawPages("") assert.Empty(t, pages) assert.NotEmpty(t, warnings) } func TestParseRawPages_PlainFence(t *testing.T) { input := "```\n[{\"title\":\"Foo\",\"type\":\"concept\",\"content\":\"ok\"}]\n```" pages, warnings := ParseRawPages(input) require.Len(t, pages, 1) assert.Empty(t, warnings) } func TestParseRawPages_MissingTitle(t *testing.T) { // Missing title — still parsed, Title is empty string input := `[{"type":"concept","content":"## Definition\n\nFoo."}]` pages, warnings := ParseRawPages(input) require.Len(t, pages, 1) assert.Empty(t, warnings) assert.Empty(t, pages[0].Title) }