Files
hyperguild/ingestion/internal/graph/extract_test.go
Mathias f53ee18cb6
All checks were successful
CI / Lint / Test / Vet (push) Successful in 12s
CI / Mirror to GitHub (push) Successful in 3s
feat(graph): add brain_entities + brain_edges store and wikilink parser
Foundation for Track A (GraphRAG on top of existing wiki). Two new
packages, both unwired — service behaviour unchanged until commit 2
hooks the pipeline.

- internal/graph: pure parser. Extract() walks markdown + frontmatter
  and emits one Entity + N wikilink Edges per doc. Dedupes per (dst,
  line), ignores self-references, classifies hall/concept/entity/
  source/knowledge from path layout.

- internal/graphstore: pgx-backed PGStore mirroring vectorstore's
  shape. Idempotent Init() creates brain_entities + brain_edges with
  indexes on src_slug, dst_slug, src_doc, wing, type. Operations:
  UpsertEntity, ReplaceEdgesForDoc (tx), DeleteByDoc, Neighbors,
  Subgraph (recursive CTE, depth ≤6), Path (shortest path, depth ≤8).

Schema lives on the shared postgres18 instance alongside the
brain_embeddings table — no new datastore. See
docs/superpowers/specs/2026-05-homelab-training-graph-next-step.md
in infra repo + infra#62.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 15:18:08 +02:00

107 lines
3.0 KiB
Go

package graph
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtract_HallDoc(t *testing.T) {
content := []byte(`---
wing: jepa-fx
hall: decisions
title: Val Vol Decision
---
# Val Vol
See also [[other-decision]] and [[parent-concept|Parent Concept]].
Linking to [[unrelated]].
`)
ent, edges, ok := Extract("wiki/jepa-fx/decisions/val-vol.md", content)
require.True(t, ok)
assert.Equal(t, "val-vol", ent.Slug)
assert.Equal(t, "hall", ent.Type)
assert.Equal(t, "jepa-fx", ent.Wing)
assert.Equal(t, "decisions", ent.Hall)
assert.Equal(t, "Val Vol Decision", ent.Title)
require.Len(t, edges, 3)
assert.Equal(t, "other-decision", edges[0].DstSlug)
assert.Equal(t, "parent-concept", edges[1].DstSlug)
assert.Equal(t, "unrelated", edges[2].DstSlug)
for _, e := range edges {
assert.Equal(t, "wikilink", e.EdgeType)
assert.Equal(t, "val-vol", e.SrcSlug)
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", e.SrcDoc)
assert.Greater(t, e.SrcLine, 0)
}
}
func TestExtract_LegacyConceptDoc(t *testing.T) {
content := []byte(`---
title: Hash Encoding
---
# Hash Encoding
Linked to [[financial-sentiment-analysis|FSA]].
`)
ent, edges, ok := Extract("wiki/concepts/hash-encoding.md", content)
require.True(t, ok)
assert.Equal(t, "hash-encoding", ent.Slug)
assert.Equal(t, "concept", ent.Type)
assert.Empty(t, ent.Wing)
assert.Empty(t, ent.Hall)
assert.Equal(t, "Hash Encoding", ent.Title)
require.Len(t, edges, 1)
assert.Equal(t, "financial-sentiment-analysis", edges[0].DstSlug)
}
func TestExtract_KnowledgeDoc(t *testing.T) {
content := []byte("# No frontmatter, no links here.\n")
ent, edges, ok := Extract("knowledge/some-note.md", content)
require.True(t, ok)
assert.Equal(t, "some-note", ent.Slug)
assert.Equal(t, "knowledge", ent.Type)
assert.Empty(t, edges)
}
func TestExtract_DedupesRepeatedLinkOnSameLine(t *testing.T) {
content := []byte("See [[foo]] and [[foo]] again on the same line.\n")
_, edges, ok := Extract("knowledge/dup.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, "foo", edges[0].DstSlug)
}
func TestExtract_KeepsMultipleEdgesOnDifferentLines(t *testing.T) {
content := []byte("First mention [[foo]].\n\nSecond mention [[foo]].\n")
_, edges, ok := Extract("knowledge/multi.md", content)
require.True(t, ok)
require.Len(t, edges, 2)
assert.NotEqual(t, edges[0].SrcLine, edges[1].SrcLine)
}
func TestExtract_IgnoresSelfLinks(t *testing.T) {
content := []byte("Self-reference [[self]] should be ignored.\n")
_, edges, ok := Extract("knowledge/self.md", content)
require.True(t, ok)
assert.Empty(t, edges)
}
func TestExtract_RejectsNonMarkdown(t *testing.T) {
_, _, ok := Extract("wiki/concepts/not-markdown.txt", []byte("anything"))
assert.False(t, ok)
}
func TestExtract_LineNumbersAre1Indexed(t *testing.T) {
content := []byte("line 1\nline 2 [[bar]]\n")
_, edges, ok := Extract("knowledge/lines.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, 2, edges[0].SrcLine)
}