Files
hyperguild/ingestion/internal/graph/extract_test.go
Mathias d5f112b600
All checks were successful
CI / Lint / Test / Vet (push) Successful in 13s
CI / Mirror to GitHub (push) Successful in 4s
feat(graph,graphstore): M2 parse tier+topic from frontmatter, persist via Upsert (infra#72)
extract.go now reads `tier:` and `topic:` from YAML frontmatter, with
a path-based fallback when frontmatter is absent (the pre-M3 state on
every existing entry):

  knowledge/* → tier=knowledge
  notes/*     → tier=note
  wiki/**     → tier=note   (sources + concepts + entities are I-level)
  inbox/**, raw/**, sessions/**, clips/** → tier=inbox

Frontmatter wins when present — covers the M3-migrated case where an
entry's path may not match the tier the author chose for it.

UpsertEntity persists both columns. M1's schema already has them.

Backfill on next pod start populates tier for the whole corpus
without any file moves; M3 will follow up with the actual layout
migration and explicit frontmatter writes.
2026-05-25 12:35:38 +02:00

180 lines
5.1 KiB
Go

package graph
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtract_HallDoc(t *testing.T) {
content := []byte(`---
wing: jepa-fx
hall: decisions
title: Val Vol Decision
---
# Val Vol
See also [[other-decision]] and [[parent-concept|Parent Concept]].
Linking to [[unrelated]].
`)
ent, edges, ok := Extract("wiki/jepa-fx/decisions/val-vol.md", content)
require.True(t, ok)
assert.Equal(t, "val-vol", ent.Slug)
assert.Equal(t, "hall", ent.Type)
assert.Equal(t, "jepa-fx", ent.Wing)
assert.Equal(t, "decisions", ent.Hall)
assert.Equal(t, "Val Vol Decision", ent.Title)
require.Len(t, edges, 3)
assert.Equal(t, "other-decision", edges[0].DstSlug)
assert.Equal(t, "parent-concept", edges[1].DstSlug)
assert.Equal(t, "unrelated", edges[2].DstSlug)
for _, e := range edges {
assert.Equal(t, "wikilink", e.EdgeType)
assert.Equal(t, "val-vol", e.SrcSlug)
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", e.SrcDoc)
assert.Greater(t, e.SrcLine, 0)
}
}
func TestExtract_LegacyConceptDoc(t *testing.T) {
content := []byte(`---
title: Hash Encoding
---
# Hash Encoding
Linked to [[financial-sentiment-analysis|FSA]].
`)
ent, edges, ok := Extract("wiki/concepts/hash-encoding.md", content)
require.True(t, ok)
assert.Equal(t, "hash-encoding", ent.Slug)
assert.Equal(t, "concept", ent.Type)
assert.Empty(t, ent.Wing)
assert.Empty(t, ent.Hall)
assert.Equal(t, "Hash Encoding", ent.Title)
require.Len(t, edges, 1)
assert.Equal(t, "financial-sentiment-analysis", edges[0].DstSlug)
}
func TestExtract_KnowledgeDoc(t *testing.T) {
content := []byte("# No frontmatter, no links here.\n")
ent, edges, ok := Extract("knowledge/some-note.md", content)
require.True(t, ok)
assert.Equal(t, "some-note", ent.Slug)
assert.Equal(t, "knowledge", ent.Type)
assert.Empty(t, edges)
}
func TestExtract_DedupesRepeatedLinkOnSameLine(t *testing.T) {
content := []byte("See [[foo]] and [[foo]] again on the same line.\n")
_, edges, ok := Extract("knowledge/dup.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, "foo", edges[0].DstSlug)
}
func TestExtract_KeepsMultipleEdgesOnDifferentLines(t *testing.T) {
content := []byte("First mention [[foo]].\n\nSecond mention [[foo]].\n")
_, edges, ok := Extract("knowledge/multi.md", content)
require.True(t, ok)
require.Len(t, edges, 2)
assert.NotEqual(t, edges[0].SrcLine, edges[1].SrcLine)
}
func TestExtract_IgnoresSelfLinks(t *testing.T) {
content := []byte("Self-reference [[self]] should be ignored.\n")
_, edges, ok := Extract("knowledge/self.md", content)
require.True(t, ok)
assert.Empty(t, edges)
}
func TestExtract_RejectsNonMarkdown(t *testing.T) {
_, _, ok := Extract("wiki/concepts/not-markdown.txt", []byte("anything"))
assert.False(t, ok)
}
func TestExtract_LineNumbersAre1Indexed(t *testing.T) {
content := []byte("line 1\nline 2 [[bar]]\n")
_, edges, ok := Extract("knowledge/lines.md", content)
require.True(t, ok)
require.Len(t, edges, 1)
assert.Equal(t, 2, edges[0].SrcLine)
}
// Files directly under wiki/ (no subdirectory) used to land
// Type="hall" Wing="<filename>.md" because the path's second segment
// was the file itself. The fix routes them to Type="knowledge" with
// empty Wing/Hall and lets frontmatter set them if present.
func TestExtract_WikiRootFileIsKnowledgeNotHall(t *testing.T) {
content := []byte("# Index\n\n- [[foo]]\n")
ent, _, ok := Extract("wiki/index.md", content)
require.True(t, ok)
assert.Equal(t, "index", ent.Slug)
assert.Equal(t, "knowledge", ent.Type)
assert.Empty(t, ent.Wing)
assert.Empty(t, ent.Hall)
}
func TestExtract_TierFromFrontmatter(t *testing.T) {
content := []byte(`---
tier: knowledge
topic: postgres-roles
title: Least-privilege migration trap
---
# body
`)
ent, _, ok := Extract("knowledge/some-lesson.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Tier)
assert.Equal(t, "postgres-roles", ent.Topic)
}
func TestExtract_TierInferredFromPath(t *testing.T) {
cases := []struct {
path string
want string
}{
{"knowledge/foo.md", "knowledge"},
{"wiki/sources/x.md", "note"},
{"wiki/concepts/x.md", "note"},
{"wiki/x.md", "note"},
{"inbox/clips/x.md", "inbox"},
{"notes/x.md", "note"},
{"raw/x.md", "inbox"},
{"sessions/x.md", "inbox"},
}
for _, tc := range cases {
ent, _, ok := Extract(tc.path, []byte("# x\n"))
require.True(t, ok, tc.path)
assert.Equal(t, tc.want, ent.Tier, tc.path)
}
}
func TestExtract_FrontmatterTierBeatsPathInference(t *testing.T) {
// A clip explicitly promoted via frontmatter wins over the path's
// inbox inference. Catches the case where a file has been moved
// to a new location but frontmatter hasn't been updated.
content := []byte("---\ntier: knowledge\n---\n# x\n")
ent, _, ok := Extract("inbox/clips/x.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Tier)
}
func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) {
content := []byte(`---
wing: homelab
hall: facts
---
# Some root note
`)
ent, _, ok := Extract("wiki/some-note.md", content)
require.True(t, ok)
assert.Equal(t, "knowledge", ent.Type)
assert.Equal(t, "homelab", ent.Wing)
assert.Equal(t, "facts", ent.Hall)
}