feat(graph,graphstore): M2 parse tier+topic from frontmatter, persist via Upsert (infra#72)
All checks were successful
CI / Lint / Test / Vet (push) Successful in 13s
CI / Mirror to GitHub (push) Successful in 4s

extract.go now reads `tier:` and `topic:` from YAML frontmatter, with
a path-based fallback when frontmatter is absent (the pre-M3 state on
every existing entry):

  knowledge/* → tier=knowledge
  notes/*     → tier=note
  wiki/**     → tier=note   (sources + concepts + entities are I-level)
  inbox/**, raw/**, sessions/**, clips/** → tier=inbox

Frontmatter wins when present — covers the M3-migrated case where an
entry's path may not match the tier the author chose for it.

UpsertEntity persists both columns. M1's schema already has them.

Backfill on next pod start populates tier for the whole corpus
without any file moves; M3 will follow up with the actual layout
migration and explicit frontmatter writes.
This commit is contained in:
Mathias
2026-05-25 12:35:38 +02:00
parent ea9518e712
commit d5f112b600
3 changed files with 95 additions and 3 deletions

View File

@@ -35,6 +35,12 @@ type Entity struct {
Wing string // optional; from frontmatter or path
Hall string // optional; from frontmatter or path
Title string // optional; from frontmatter
// DIKW tier — infra#72. Empty until M3 migration writes `tier:`
// frontmatter to every entry. Path-inferred tier kicks in as a
// fallback so the column populates immediately on backfill even
// for entries that haven't had their frontmatter rewritten yet.
Tier string // "inbox" | "note" | "knowledge"
Topic string // kebab-slug; the thing the entry is about
}
// Edge represents a directed relationship between two slugs.
@@ -67,11 +73,42 @@ func Extract(docPath string, content []byte) (Entity, []Edge, bool) {
ent := Entity{DocPath: docPath, Slug: slug}
classifyByPath(&ent, docPath)
readFrontmatter(&ent, content)
inferTierFromPath(&ent, docPath)
edges := extractEdges(docPath, slug, content)
return ent, edges, true
}
// inferTierFromPath fills Tier when frontmatter didn't already set it.
// The new layout has dedicated subtrees per tier; pre-migration paths
// (knowledge/, wiki/, raw/, sessions/) get their best-guess mapping so
// the column populates on backfill before the M3 file moves run.
func inferTierFromPath(e *Entity, docPath string) {
if e.Tier != "" {
return
}
parts := strings.Split(docPath, "/")
if len(parts) == 0 {
return
}
switch parts[0] {
case "inbox":
e.Tier = "inbox"
case "notes":
e.Tier = "note"
case "knowledge":
e.Tier = "knowledge"
case "wiki":
// Pre-M3 wiki layout: sources are synth output of raw inbox
// material (I tier); concepts + entities are reference notes
// (also I tier); top-level wiki/<slug>.md is unstructured
// reference too. None of these are reusable lessons (K).
e.Tier = "note"
case "raw", "sessions", "clips":
e.Tier = "inbox"
}
}
func slugFromPath(docPath string) string {
base := filepath.Base(docPath)
if !strings.HasSuffix(base, ".md") {
@@ -152,6 +189,14 @@ func readFrontmatter(e *Entity, content []byte) {
if e.Hall == "" {
e.Hall = v
}
case "tier":
if e.Tier == "" {
e.Tier = v
}
case "topic":
if e.Topic == "" {
e.Topic = v
}
}
}
}