// ingestion/internal/pipeline/pipeline_test.go package pipeline import ( "context" "encoding/json" "net/http" "net/http/httptest" "os" "path/filepath" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/mathiasbq/hyperguild/ingestion/internal/llm" "github.com/mathiasbq/hyperguild/ingestion/internal/wiki" ) func TestRun_WritesPages(t *testing.T) { brainDir := t.TempDir() for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} { require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755)) } llmResponse := mustJSON([]wiki.Page{ { Path: "wiki/sources/test-article.md", Content: "---\ntitle: Test Article\ntype: article\ndomain: software-engineering\ndate_ingested: 2026-04-22\nlast_updated: 2026-04-22\naliases:\n - Test Article\n---\n\n## Summary\n\nA test article.\n\n## Key Claims\n\n- It tests things.\n\n## Concepts Introduced or Reinforced\n\n## Entities Mentioned\n\n## Open Questions Raised\n", }, { Path: "wiki/concepts/testing.md", Content: "---\ntitle: Testing\ndomain: software-engineering\nlast_updated: 2026-04-22\naliases:\n - Testing\n---\n\n## Definition\n\nThe practice of verifying software.\n\n## Why It Matters\n\nCatches bugs.\n\n## Related Concepts\n\n## Related Entities\n\n## Sources\n\n## Evolving Notes\n", }, }) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{ {"message": map[string]any{"role": "assistant", "content": llmResponse}}, }, }) })) defer srv.Close() cfg := Config{ Complete: llm.New(srv.URL, "", "test-model", 30*time.Second).Complete, ChunkSize: 0, } result, err := Run(context.Background(), cfg, brainDir, "An article about testing.", "test-article", false) require.NoError(t, err) assert.Len(t, result.Pages, 2) assert.Empty(t, result.Warnings) _, err = os.Stat(filepath.Join(brainDir, "wiki", "sources", "test-article.md")) require.NoError(t, err) _, err = os.Stat(filepath.Join(brainDir, "wiki", "concepts", "testing.md")) require.NoError(t, err) _, err = os.Stat(filepath.Join(brainDir, "wiki", "index.md")) require.NoError(t, err) _, err = os.Stat(filepath.Join(brainDir, "log.md")) require.NoError(t, err) } func TestRun_DryRunDoesNotWrite(t *testing.T) { brainDir := t.TempDir() for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} { require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755)) } llmResponse := mustJSON([]wiki.Page{{ Path: "wiki/sources/foo.md", Content: "---\ntitle: Foo\n---\n\n## Summary\n\nFoo.\n", }}) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{{"message": map[string]any{"content": llmResponse}}}, }) })) defer srv.Close() cfg := Config{Complete: llm.New(srv.URL, "", "m", 30*time.Second).Complete} result, err := Run(context.Background(), cfg, brainDir, "foo content", "foo", true) require.NoError(t, err) assert.Len(t, result.Pages, 1) _, err = os.Stat(filepath.Join(brainDir, "wiki", "sources", "foo.md")) assert.True(t, os.IsNotExist(err)) } func TestRun_MergesDuplicatePaths(t *testing.T) { brainDir := t.TempDir() for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources"} { require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755)) } // LLM returns same path twice (simulates multi-chunk merge) llmResponse := mustJSON([]wiki.Page{ {Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFirst.\n\n## Related Concepts\n\n- [[bar|Bar]]\n"}, {Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nSecond.\n\n## Related Concepts\n\n- [[baz|Baz]]\n"}, }) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{{"message": map[string]any{"content": llmResponse}}}, }) })) defer srv.Close() cfg := Config{Complete: llm.New(srv.URL, "", "m", 30*time.Second).Complete} result, err := Run(context.Background(), cfg, brainDir, "content", "foo", false) require.NoError(t, err) assert.Len(t, result.Pages, 1) // deduplicated content, err := os.ReadFile(filepath.Join(brainDir, "wiki", "concepts", "foo.md")) require.NoError(t, err) // keep-first for Definition, union for Related Concepts assert.Contains(t, string(content), "First.") assert.Contains(t, string(content), "[[bar|Bar]]") assert.Contains(t, string(content), "[[baz|Baz]]") } func mustJSON(v any) string { b, err := json.Marshal(v) if err != nil { panic(err) } return string(b) }