diff --git a/ingestion/internal/graph/extract.go b/ingestion/internal/graph/extract.go index 0281665..f557cd2 100644 --- a/ingestion/internal/graph/extract.go +++ b/ingestion/internal/graph/extract.go @@ -35,6 +35,12 @@ type Entity struct { Wing string // optional; from frontmatter or path Hall string // optional; from frontmatter or path Title string // optional; from frontmatter + // DIKW tier — infra#72. Empty until M3 migration writes `tier:` + // frontmatter to every entry. Path-inferred tier kicks in as a + // fallback so the column populates immediately on backfill even + // for entries that haven't had their frontmatter rewritten yet. + Tier string // "inbox" | "note" | "knowledge" + Topic string // kebab-slug; the thing the entry is about } // Edge represents a directed relationship between two slugs. @@ -67,11 +73,42 @@ func Extract(docPath string, content []byte) (Entity, []Edge, bool) { ent := Entity{DocPath: docPath, Slug: slug} classifyByPath(&ent, docPath) readFrontmatter(&ent, content) + inferTierFromPath(&ent, docPath) edges := extractEdges(docPath, slug, content) return ent, edges, true } +// inferTierFromPath fills Tier when frontmatter didn't already set it. +// The new layout has dedicated subtrees per tier; pre-migration paths +// (knowledge/, wiki/, raw/, sessions/) get their best-guess mapping so +// the column populates on backfill before the M3 file moves run. +func inferTierFromPath(e *Entity, docPath string) { + if e.Tier != "" { + return + } + parts := strings.Split(docPath, "/") + if len(parts) == 0 { + return + } + switch parts[0] { + case "inbox": + e.Tier = "inbox" + case "notes": + e.Tier = "note" + case "knowledge": + e.Tier = "knowledge" + case "wiki": + // Pre-M3 wiki layout: sources are synth output of raw inbox + // material (I tier); concepts + entities are reference notes + // (also I tier); top-level wiki/.md is unstructured + // reference too. None of these are reusable lessons (K). + e.Tier = "note" + case "raw", "sessions", "clips": + e.Tier = "inbox" + } +} + func slugFromPath(docPath string) string { base := filepath.Base(docPath) if !strings.HasSuffix(base, ".md") { @@ -152,6 +189,14 @@ func readFrontmatter(e *Entity, content []byte) { if e.Hall == "" { e.Hall = v } + case "tier": + if e.Tier == "" { + e.Tier = v + } + case "topic": + if e.Topic == "" { + e.Topic = v + } } } } diff --git a/ingestion/internal/graph/extract_test.go b/ingestion/internal/graph/extract_test.go index 53faa0a..74619ee 100644 --- a/ingestion/internal/graph/extract_test.go +++ b/ingestion/internal/graph/extract_test.go @@ -119,6 +119,51 @@ func TestExtract_WikiRootFileIsKnowledgeNotHall(t *testing.T) { assert.Empty(t, ent.Hall) } +func TestExtract_TierFromFrontmatter(t *testing.T) { + content := []byte(`--- +tier: knowledge +topic: postgres-roles +title: Least-privilege migration trap +--- +# body +`) + ent, _, ok := Extract("knowledge/some-lesson.md", content) + require.True(t, ok) + assert.Equal(t, "knowledge", ent.Tier) + assert.Equal(t, "postgres-roles", ent.Topic) +} + +func TestExtract_TierInferredFromPath(t *testing.T) { + cases := []struct { + path string + want string + }{ + {"knowledge/foo.md", "knowledge"}, + {"wiki/sources/x.md", "note"}, + {"wiki/concepts/x.md", "note"}, + {"wiki/x.md", "note"}, + {"inbox/clips/x.md", "inbox"}, + {"notes/x.md", "note"}, + {"raw/x.md", "inbox"}, + {"sessions/x.md", "inbox"}, + } + for _, tc := range cases { + ent, _, ok := Extract(tc.path, []byte("# x\n")) + require.True(t, ok, tc.path) + assert.Equal(t, tc.want, ent.Tier, tc.path) + } +} + +func TestExtract_FrontmatterTierBeatsPathInference(t *testing.T) { + // A clip explicitly promoted via frontmatter wins over the path's + // inbox inference. Catches the case where a file has been moved + // to a new location but frontmatter hasn't been updated. + content := []byte("---\ntier: knowledge\n---\n# x\n") + ent, _, ok := Extract("inbox/clips/x.md", content) + require.True(t, ok) + assert.Equal(t, "knowledge", ent.Tier) +} + func TestExtract_WikiRootFileWithFrontmatterWingHall(t *testing.T) { content := []byte(`--- wing: homelab diff --git a/ingestion/internal/graphstore/pg.go b/ingestion/internal/graphstore/pg.go index fa0942b..4ace151 100644 --- a/ingestion/internal/graphstore/pg.go +++ b/ingestion/internal/graphstore/pg.go @@ -118,16 +118,18 @@ func (s *PGStore) UpsertEntity(ctx context.Context, e graph.Entity) error { e.Type = "knowledge" } _, err := s.pool.Exec(ctx, ` - INSERT INTO brain_entities (slug, type, wing, hall, doc_path, title, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, now()) + INSERT INTO brain_entities (slug, type, wing, hall, doc_path, title, tier, topic, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, now()) ON CONFLICT (slug) DO UPDATE SET type = EXCLUDED.type, wing = EXCLUDED.wing, hall = EXCLUDED.hall, doc_path = EXCLUDED.doc_path, title = EXCLUDED.title, + tier = EXCLUDED.tier, + topic = EXCLUDED.topic, updated_at = now() - `, e.Slug, e.Type, e.Wing, e.Hall, e.DocPath, e.Title) + `, e.Slug, e.Type, e.Wing, e.Hall, e.DocPath, e.Title, e.Tier, e.Topic) if err != nil { return fmt.Errorf("upsert entity %q: %w", e.Slug, err) }