feat: source back-references on concept and entity pages
After each ingestion, every concept and entity page linked from the source page gains a ## Sources entry pointing back to that source. Pages already on disk (from prior ingestions) are loaded and updated, so re-ingesting a new source accumulates references over time. Deduplication is handled by wiki.Merge's existing bullet-section logic.
This commit is contained in:
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
433
docs/superpowers/plans/2026-04-23-source-backrefs.md
Normal file
@@ -0,0 +1,433 @@
|
|||||||
|
# Source Back-References Implementation Plan
|
||||||
|
|
||||||
|
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||||
|
|
||||||
|
**Goal:** After the LLM produces wiki pages for an ingestion, automatically inject a `## Sources` back-reference on every concept and entity page that the source page links to.
|
||||||
|
|
||||||
|
**Architecture:** A new `injectSourceRefs` post-processing step is inserted between `Resolve` and `mergeAll` in `pipeline.Run`. It finds the source page in the proposed batch, extracts all `[[slug|...]]` wikilinks, then calls `wiki.Merge` with a minimal patch page to add the back-reference. `wiki.Merge` already treats `## Sources` as a bullet section with deduplication — no custom section parsing is needed. For concepts/entities that exist on disk but weren't proposed in the current batch (the common case on re-ingestion), the function loads them from disk and adds them to the pages list so they are updated.
|
||||||
|
|
||||||
|
**Tech Stack:** Go stdlib (`regexp`, `os`, `path/filepath`, `strings`), existing `wiki.Merge` and `wiki.Page` types.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
**New files:**
|
||||||
|
- `ingestion/internal/pipeline/refs.go` — `injectSourceRefs`, `addSourceRef`, `extractWikilinks`, `findSourcePage`, `findInInventory`
|
||||||
|
- `ingestion/internal/pipeline/refs_test.go` — table-driven tests
|
||||||
|
|
||||||
|
**Modified files:**
|
||||||
|
- `ingestion/internal/pipeline/pipeline.go` — insert `injectSourceRefs` call between `Resolve` and `mergeAll`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: `refs.go` — source back-reference injection
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/pipeline/refs_test.go`
|
||||||
|
- Create: `ingestion/internal/pipeline/refs.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write the failing tests**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/refs_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// makeInventory builds a minimal inventory for test use.
|
||||||
|
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||||
|
inv := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
wiki.PageTypeEntity: {},
|
||||||
|
wiki.PageTypeSource: {},
|
||||||
|
}
|
||||||
|
for _, slug := range concepts {
|
||||||
|
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
for _, slug := range entities {
|
||||||
|
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
return inv
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Equal(t, pages, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/domain-driven-design.md",
|
||||||
|
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
assert.Contains(t, got[1].Content, "## Sources")
|
||||||
|
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||||
|
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(conceptDir, "shape-up.md"),
|
||||||
|
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inv := makeInventory([]string{"shape-up"}, nil)
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, inv, brainDir)
|
||||||
|
|
||||||
|
// Should have loaded shape-up.md from disk and added it with source ref.
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var conceptPage wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/concepts/shape-up.md" {
|
||||||
|
conceptPage = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||||
|
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||||
|
// Original content preserved.
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
// Only one page — source should not reference itself.
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||||
|
// Concept already has source ref from a prior ingestion.
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/ddd.md",
|
||||||
|
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
// The source ref must appear exactly once.
|
||||||
|
count := 0
|
||||||
|
for _, line := range splitLines(got[1].Content) {
|
||||||
|
if line == "- [[my-article|My Article]]" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/book.md",
|
||||||
|
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/entities/ryan-singer.md",
|
||||||
|
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var entity wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||||
|
entity = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractWikilinks(t *testing.T) {
|
||||||
|
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||||
|
got := extractWikilinks(content)
|
||||||
|
assert.True(t, got["foo"])
|
||||||
|
assert.True(t, got["bar"])
|
||||||
|
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitLines is a test helper.
|
||||||
|
func splitLines(s string) []string {
|
||||||
|
var out []string
|
||||||
|
for _, l := range splitNewlines(s) {
|
||||||
|
if l != "" {
|
||||||
|
out = append(out, l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitNewlines(s string) []string {
|
||||||
|
var lines []string
|
||||||
|
start := 0
|
||||||
|
for i, c := range s {
|
||||||
|
if c == '\n' {
|
||||||
|
lines = append(lines, s[start:i])
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lines = append(lines, s[start:])
|
||||||
|
return lines
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify they fail**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -run "TestInjectSourceRefs|TestExtractWikilinks" -v
|
||||||
|
```
|
||||||
|
Expected: compile error — `injectSourceRefs` and `extractWikilinks` not defined.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement refs.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/refs.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||||
|
|
||||||
|
// injectSourceRefs finds the source page in the proposed batch, extracts its wikilinks,
|
||||||
|
// and injects a back-reference into every linked concept or entity page.
|
||||||
|
// Pages that exist on disk but are not in the current batch are loaded and appended
|
||||||
|
// so they will be updated on write.
|
||||||
|
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||||
|
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||||
|
if !found {
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// Locate source page content for wikilink extraction.
|
||||||
|
var sourceContent string
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||||
|
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||||
|
sourceContent = p.Content
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
linkedSlugs := extractWikilinks(sourceContent)
|
||||||
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||||
|
|
||||||
|
// Build slug → index map for proposed pages (excluding wiki/sources/).
|
||||||
|
bySlug := make(map[string]int, len(pages))
|
||||||
|
for i, p := range pages {
|
||||||
|
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for slug := range linkedSlugs {
|
||||||
|
if slug == sourceSlug {
|
||||||
|
continue // no self-reference
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx, ok := bySlug[slug]; ok {
|
||||||
|
// Concept/entity is in the proposed batch — inject inline.
|
||||||
|
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not in proposed batch — look for it in the inventory (exists on disk).
|
||||||
|
pt, ok := findInInventory(slug, inventory)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||||
|
b, err := os.ReadFile(diskPath)
|
||||||
|
if err != nil {
|
||||||
|
continue // page not found on disk; skip
|
||||||
|
}
|
||||||
|
page := wiki.Page{
|
||||||
|
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||||
|
Content: string(b),
|
||||||
|
}
|
||||||
|
pages = append(pages, addSourceRef(page, sourceRef))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// addSourceRef injects sourceRef into the ## Sources bullet section of page.
|
||||||
|
// Uses wiki.Merge so that existing Sources entries are deduplicated and all
|
||||||
|
// other sections are preserved unchanged.
|
||||||
|
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||||
|
patch := wiki.Page{
|
||||||
|
Path: page.Path,
|
||||||
|
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||||
|
}
|
||||||
|
return wiki.Merge(page, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||||
|
func extractWikilinks(content string) map[string]bool {
|
||||||
|
slugs := make(map[string]bool)
|
||||||
|
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||||
|
slugs[m[1]] = true
|
||||||
|
}
|
||||||
|
return slugs
|
||||||
|
}
|
||||||
|
|
||||||
|
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||||
|
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||||
|
title = extractTitle(p.Content)
|
||||||
|
if title == "" {
|
||||||
|
title = slug
|
||||||
|
}
|
||||||
|
return slug, title, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||||
|
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.Slug == slug {
|
||||||
|
return pt, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: all existing tests PASS + 7 new refs tests PASS.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/refs.go ingestion/internal/pipeline/refs_test.go && git commit -m "feat(pipeline): inject source back-references into concept and entity pages"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: Wire injectSourceRefs into pipeline.Run
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/pipeline/pipeline.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Insert the call**
|
||||||
|
|
||||||
|
In `pipeline.go`, locate:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
withRefs := injectSourceRefs(resolved, inventory, brainDir)
|
||||||
|
merged := mergeAll(withRefs)
|
||||||
|
```
|
||||||
|
|
||||||
|
No import changes needed — same package.
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: all tests PASS. The existing `TestRun_WritesPages` and `TestRun_DryRunDoesNotWrite` use LLM mocks that return source pages with no wikilinks to concepts — `injectSourceRefs` is a no-op for them.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Run full test suite + lint**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs/ingestion && go test ./... && golangci-lint run ./...
|
||||||
|
```
|
||||||
|
Expected: all packages PASS, 0 lint issues.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /Users/mathias/Documents/local-dev/AI/hyperguild/.worktrees/feat-source-backrefs && git add ingestion/internal/pipeline/pipeline.go && git commit -m "feat(pipeline): wire source back-reference injection into Run"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Self-Review
|
||||||
|
|
||||||
|
**Spec coverage:**
|
||||||
|
|
||||||
|
| Requirement | Task |
|
||||||
|
|---|---|
|
||||||
|
| Concepts get `## Sources` back-link to ingested source | Task 1 |
|
||||||
|
| Entities get `## Sources` back-link | Task 1 (TestInjectSourceRefs_InjectsIntoEntity) |
|
||||||
|
| Existing pages on disk get updated with new source | Task 1 (TestInjectSourceRefs_LoadsConceptFromDisk) |
|
||||||
|
| Re-ingestion of same source does not duplicate the ref | Task 1 (TestInjectSourceRefs_DeduplicatesOnReingestion) |
|
||||||
|
| Source page does not reference itself | Task 1 (TestInjectSourceRefs_NoSelfReference) |
|
||||||
|
| No-op when batch has no source page | Task 1 (TestInjectSourceRefs_NoSourcePage) |
|
||||||
|
| Wired into Run between Resolve and mergeAll | Task 2 |
|
||||||
|
| Full test suite and lint pass | Task 2 Step 3 |
|
||||||
|
|
||||||
|
**Placeholder scan:** None.
|
||||||
|
|
||||||
|
**Type consistency:** `injectSourceRefs([]wiki.Page, map[wiki.PageType][]wiki.Entry, string) []wiki.Page` — used identically in refs.go (definition) and pipeline.go (call site).
|
||||||
@@ -58,7 +58,8 @@ func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryR
|
|||||||
}
|
}
|
||||||
|
|
||||||
resolved := Resolve(allPages, inventory)
|
resolved := Resolve(allPages, inventory)
|
||||||
merged := mergeAll(resolved)
|
withRefs := injectSourceRefs(resolved, inventory, brainDir)
|
||||||
|
merged := mergeAll(withRefs)
|
||||||
|
|
||||||
date := time.Now().UTC().Format("2006-01-02")
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
var written []string
|
var written []string
|
||||||
|
|||||||
115
ingestion/internal/pipeline/refs.go
Normal file
115
ingestion/internal/pipeline/refs.go
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
// ingestion/internal/pipeline/refs.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
var wikilinkRE = regexp.MustCompile(`\[\[([^|\]]+)\|`)
|
||||||
|
|
||||||
|
// injectSourceRefs finds the source page in the proposed batch, extracts its
|
||||||
|
// wikilinks, and injects a back-reference into every linked concept or entity page.
|
||||||
|
// Pages that exist on disk but are not in the current batch are loaded and
|
||||||
|
// appended so they will be updated on write.
|
||||||
|
func injectSourceRefs(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry, brainDir string) []wiki.Page {
|
||||||
|
sourceSlug, sourceTitle, found := findSourcePage(pages)
|
||||||
|
if !found {
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
var sourceContent string
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") &&
|
||||||
|
strings.TrimSuffix(filepath.Base(p.Path), ".md") == sourceSlug {
|
||||||
|
sourceContent = p.Content
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
linkedSlugs := extractWikilinks(sourceContent)
|
||||||
|
sourceRef := "- [[" + sourceSlug + "|" + sourceTitle + "]]"
|
||||||
|
|
||||||
|
bySlug := make(map[string]int, len(pages))
|
||||||
|
for i, p := range pages {
|
||||||
|
if !strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
bySlug[strings.TrimSuffix(filepath.Base(p.Path), ".md")] = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for slug := range linkedSlugs {
|
||||||
|
if slug == sourceSlug {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if idx, ok := bySlug[slug]; ok {
|
||||||
|
pages[idx] = addSourceRef(pages[idx], sourceRef)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pt, ok := findInInventory(slug, inventory)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
diskPath := filepath.Join(brainDir, "wiki", string(pt), slug+".md")
|
||||||
|
b, err := os.ReadFile(diskPath)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
page := wiki.Page{
|
||||||
|
Path: "wiki/" + string(pt) + "/" + slug + ".md",
|
||||||
|
Content: string(b),
|
||||||
|
}
|
||||||
|
pages = append(pages, addSourceRef(page, sourceRef))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// addSourceRef injects sourceRef into the ## Sources bullet section of page
|
||||||
|
// using wiki.Merge, which deduplicates bullets automatically.
|
||||||
|
func addSourceRef(page wiki.Page, sourceRef string) wiki.Page {
|
||||||
|
patch := wiki.Page{
|
||||||
|
Path: page.Path,
|
||||||
|
Content: "\n## Sources\n\n" + sourceRef + "\n",
|
||||||
|
}
|
||||||
|
return wiki.Merge(page, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractWikilinks returns the set of slugs referenced as [[slug|...]] in content.
|
||||||
|
func extractWikilinks(content string) map[string]bool {
|
||||||
|
slugs := make(map[string]bool)
|
||||||
|
for _, m := range wikilinkRE.FindAllStringSubmatch(content, -1) {
|
||||||
|
slugs[m[1]] = true
|
||||||
|
}
|
||||||
|
return slugs
|
||||||
|
}
|
||||||
|
|
||||||
|
// findSourcePage returns the slug and title of the first wiki/sources/ page in pages.
|
||||||
|
func findSourcePage(pages []wiki.Page) (slug, title string, found bool) {
|
||||||
|
for _, p := range pages {
|
||||||
|
if strings.HasPrefix(p.Path, "wiki/sources/") {
|
||||||
|
slug = strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
||||||
|
title = extractTitle(p.Content)
|
||||||
|
if title == "" {
|
||||||
|
title = slug
|
||||||
|
}
|
||||||
|
return slug, title, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// findInInventory returns the PageType for a slug if it appears in the inventory.
|
||||||
|
func findInInventory(slug string, inventory map[wiki.PageType][]wiki.Entry) (wiki.PageType, bool) {
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
if e.Slug == slug {
|
||||||
|
return pt, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
172
ingestion/internal/pipeline/refs_test.go
Normal file
172
ingestion/internal/pipeline/refs_test.go
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
// ingestion/internal/pipeline/refs_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func makeInventory(concepts, entities []string) map[wiki.PageType][]wiki.Entry {
|
||||||
|
inv := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
wiki.PageTypeEntity: {},
|
||||||
|
wiki.PageTypeSource: {},
|
||||||
|
}
|
||||||
|
for _, slug := range concepts {
|
||||||
|
inv[wiki.PageTypeConcept] = append(inv[wiki.PageTypeConcept], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
for _, slug := range entities {
|
||||||
|
inv[wiki.PageTypeEntity] = append(inv[wiki.PageTypeEntity], wiki.Entry{Slug: slug, Title: slug})
|
||||||
|
}
|
||||||
|
return inv
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSourcePage(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/foo.md", Content: "---\ntitle: Foo\n---\n\n## Definition\n\nFoo.\n"},
|
||||||
|
}
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Equal(t, pages, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoProposedConcept(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[domain-driven-design|Domain Driven Design]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/domain-driven-design.md",
|
||||||
|
Content: "---\ntitle: Domain Driven Design\n---\n\n## Definition\n\nA methodology.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
assert.Contains(t, got[1].Content, "## Sources")
|
||||||
|
assert.Contains(t, got[1].Content, "[[my-article|My Article]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_LoadsConceptFromDisk(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
conceptDir := filepath.Join(brainDir, "wiki", "concepts")
|
||||||
|
require.NoError(t, os.MkdirAll(conceptDir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(conceptDir, "shape-up.md"),
|
||||||
|
[]byte("---\ntitle: Shape Up\n---\n\n## Definition\n\nA methodology.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[shape-up|Shape Up]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
inv := makeInventory([]string{"shape-up"}, nil)
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, inv, brainDir)
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var conceptPage wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/concepts/shape-up.md" {
|
||||||
|
conceptPage = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Sources")
|
||||||
|
assert.Contains(t, conceptPage.Content, "[[my-article|My Article]]")
|
||||||
|
assert.Contains(t, conceptPage.Content, "## Definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_NoSelfReference(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSelf-link [[my-article|My Article]].\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_DeduplicatesOnReingestion(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/my-article.md",
|
||||||
|
Content: "---\ntitle: My Article\n---\n\n## Summary\n\nSee [[ddd|DDD]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/concepts/ddd.md",
|
||||||
|
Content: "---\ntitle: DDD\n---\n\n## Definition\n\nA thing.\n\n## Sources\n\n- [[my-article|My Article]]\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
count := 0
|
||||||
|
for _, line := range splitLines(got[1].Content) {
|
||||||
|
if line == "- [[my-article|My Article]]" {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, count, "source ref should appear exactly once")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInjectSourceRefs_InjectsIntoEntity(t *testing.T) {
|
||||||
|
pages := []wiki.Page{
|
||||||
|
{
|
||||||
|
Path: "wiki/sources/book.md",
|
||||||
|
Content: "---\ntitle: Book\n---\n\n## Summary\n\nBy [[ryan-singer|Ryan Singer]].\n",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Path: "wiki/entities/ryan-singer.md",
|
||||||
|
Content: "---\ntitle: Ryan Singer\n---\n\n## Description\n\nA designer.\n",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := injectSourceRefs(pages, makeInventory(nil, nil), t.TempDir())
|
||||||
|
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
var entity wiki.Page
|
||||||
|
for _, p := range got {
|
||||||
|
if p.Path == "wiki/entities/ryan-singer.md" {
|
||||||
|
entity = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Contains(t, entity.Content, "[[book|Book]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractWikilinks(t *testing.T) {
|
||||||
|
content := "See [[foo|Foo]] and [[bar|Bar]] and [[foo|Foo again]]."
|
||||||
|
got := extractWikilinks(content)
|
||||||
|
assert.True(t, got["foo"])
|
||||||
|
assert.True(t, got["bar"])
|
||||||
|
assert.Len(t, got, 2, "duplicate slugs should be deduplicated")
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitLines(s string) []string {
|
||||||
|
var out []string
|
||||||
|
start := 0
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
if s[i] == '\n' {
|
||||||
|
if line := s[start:i]; line != "" {
|
||||||
|
out = append(out, line)
|
||||||
|
}
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if last := s[start:]; last != "" {
|
||||||
|
out = append(out, last)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user