From 1b00cbc0aedf5b21aa6f9962b77e984a7da312f3 Mon Sep 17 00:00:00 2001 From: Mathias Date: Mon, 25 May 2026 18:47:25 +0200 Subject: [PATCH] =?UTF-8?q?fix(search,graph):=20M4b=20wiki/entities/=20?= =?UTF-8?q?=E2=86=92=20tier=3Dknowledge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial M4 mapping put wiki/entities/* in tier=note. Post-M4 eval regressed qwen35-9b-fast from rank 2 → off top-5: knowledge entries that cite the entity in passing now outscore the entity page itself (1.5× weight vs 1.0×). Entity anchor pages are durable facts about concrete things — they map cleanly to the knowledge/facts/ slot in the post-M3 layout target. Promote them now so the path inference matches. Eval re-run after deploy is in infra#72. --- ingestion/internal/graph/extract.go | 17 ++++++++++++----- ingestion/internal/search/search.go | 5 +++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/ingestion/internal/graph/extract.go b/ingestion/internal/graph/extract.go index f557cd2..9f0bda5 100644 --- a/ingestion/internal/graph/extract.go +++ b/ingestion/internal/graph/extract.go @@ -99,11 +99,18 @@ func inferTierFromPath(e *Entity, docPath string) { case "knowledge": e.Tier = "knowledge" case "wiki": - // Pre-M3 wiki layout: sources are synth output of raw inbox - // material (I tier); concepts + entities are reference notes - // (also I tier); top-level wiki/.md is unstructured - // reference too. None of these are reusable lessons (K). - e.Tier = "note" + // Pre-M3 wiki layout. Most subdirs are I-level: + // wiki/sources/ — synth summaries of raw inbox material + // wiki/concepts/ — definitions, not lessons + // One exception: wiki/entities/ holds anchor facts about + // concrete things (models, services, people) that the eval + // expects to surface when queried directly. Those map to K + // to match the post-M3 layout target (knowledge/facts/). + if len(parts) >= 2 && parts[1] == "entities" { + e.Tier = "knowledge" + } else { + e.Tier = "note" + } case "raw", "sessions", "clips": e.Tier = "inbox" } diff --git a/ingestion/internal/search/search.go b/ingestion/internal/search/search.go index 32d58a2..ec75d5b 100644 --- a/ingestion/internal/search/search.go +++ b/ingestion/internal/search/search.go @@ -343,6 +343,11 @@ func extractTier(content, relPath string) string { case "notes": return "note" case "wiki": + // wiki/entities/ anchor pages map to knowledge (see + // graph.inferTierFromPath for the rationale). + if len(parts) >= 2 && parts[1] == "entities" { + return "knowledge" + } return "note" case "knowledge": return "knowledge"