feat: PDF extraction and fuzzy entity resolution
- New extract package: Text() dispatcher for .md/.txt passthrough and PDF extraction via pdftotext subprocess - wiki.Entry gains Aliases []string, loaded from YAML frontmatter - Fuzzy entity resolution in pipeline: normalizes titles (lowercase, strip articles, collapse hyphens) and matches proposed pages against existing inventory slugs and aliases to prevent proliferation - Watcher and API handler now use extract.Text() instead of os.ReadFile - Dockerfile: apk add poppler-utils in Alpine runtime stage
This commit is contained in:
858
docs/superpowers/plans/2026-04-22-brain-ingestion-quality.md
Normal file
858
docs/superpowers/plans/2026-04-22-brain-ingestion-quality.md
Normal file
@@ -0,0 +1,858 @@
|
|||||||
|
# Brain Ingestion Quality: PDF Extraction + Entity Resolution
|
||||||
|
|
||||||
|
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||||
|
|
||||||
|
**Goal:** Fix PDF ingestion (currently passes raw bytes to LLM) and add fuzzy entity resolution (prevents slug proliferation at scale).
|
||||||
|
|
||||||
|
**Architecture:** Two independent improvements wired into the existing pipeline. A new `extract` package handles text extraction by file type (pdftotext subprocess, passthrough for .md/.txt). A new `resolve.go` in the `pipeline` package normalizes proposed entity/concept titles against the loaded inventory to reuse existing slugs instead of creating duplicates. Both changes are wired into `watcher.go` and `api/handler.go` with no new dependencies except `poppler-utils` in the Docker image.
|
||||||
|
|
||||||
|
**Tech Stack:** Go stdlib (`os/exec`, `bufio`, `strings`), testify, poppler-utils (`pdftotext`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
**New files:**
|
||||||
|
- `ingestion/internal/extract/extract.go` — `Text(path string) (string, error)` dispatcher
|
||||||
|
- `ingestion/internal/extract/pdf.go` — `pdftotext` subprocess extraction
|
||||||
|
- `ingestion/internal/extract/extract_test.go` — table-driven tests for all paths
|
||||||
|
- `ingestion/internal/pipeline/resolve.go` — `Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page`
|
||||||
|
- `ingestion/internal/pipeline/resolve_test.go` — table-driven tests
|
||||||
|
|
||||||
|
**Modified files:**
|
||||||
|
- `ingestion/internal/wiki/types.go` — add `Aliases []string` to `Entry`
|
||||||
|
- `ingestion/internal/wiki/inventory.go` — `readFrontmatter` reads both title and aliases
|
||||||
|
- `ingestion/internal/wiki/inventory_test.go` — add alias coverage
|
||||||
|
- `ingestion/internal/pipeline/pipeline.go` — call `Resolve` after `ParsePages`
|
||||||
|
- `ingestion/internal/watcher/watcher.go` — call `extract.Text` instead of `os.ReadFile`
|
||||||
|
- `ingestion/internal/api/handler.go` — call `extract.Text` for path-based ingestion
|
||||||
|
- `ingestion/Dockerfile` — `apk add poppler-utils`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: `extract` package — Text() dispatcher with .md/.txt passthrough
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/extract/extract.go`
|
||||||
|
- Create: `ingestion/internal/extract/extract_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write the failing test**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/extract_test.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestText_Markdown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.md")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("# Hello\n\nWorld."), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "# Hello\n\nWorld.", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_Txt(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.txt")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("plain text"), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "plain text", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_UnsupportedExtension(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "data.csv")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a,b,c"), 0o644))
|
||||||
|
|
||||||
|
_, err := Text(path)
|
||||||
|
assert.ErrorContains(t, err, "unsupported")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: compile error — package does not exist yet.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement extract.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/extract.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Text reads the file at path and returns its plain-text content.
|
||||||
|
// Supported extensions: .md, .txt (passthrough), .pdf (via pdftotext).
|
||||||
|
func Text(path string) (string, error) {
|
||||||
|
ext := strings.ToLower(fileExt(path))
|
||||||
|
switch ext {
|
||||||
|
case ".md", ".txt":
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
case ".pdf":
|
||||||
|
return extractPDF(path)
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unsupported file extension: %s", ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fileExt returns the file extension including the dot, lowercased.
|
||||||
|
func fileExt(path string) string {
|
||||||
|
for i := len(path) - 1; i >= 0; i-- {
|
||||||
|
if path[i] == '.' {
|
||||||
|
return path[i:]
|
||||||
|
}
|
||||||
|
if path[i] == '/' || path[i] == '\\' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Add pdf.go stub so it compiles**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func extractPDF(_ string) (string, error) {
|
||||||
|
return "", fmt.Errorf("PDF extraction not implemented")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run tests to verify they pass**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — 3 tests passing.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/extract/
|
||||||
|
git commit -m "feat(extract): add Text() dispatcher with md/txt passthrough"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: PDF extraction via pdftotext
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/extract/pdf.go`
|
||||||
|
- Modify: `ingestion/internal/extract/extract_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add PDF test (skip if pdftotext absent)**
|
||||||
|
|
||||||
|
Append to `extract_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func TestText_PDF(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("pdftotext"); err != nil {
|
||||||
|
t.Skip("pdftotext not available")
|
||||||
|
}
|
||||||
|
// Use a known PDF fixture; if none, create a minimal one via echo.
|
||||||
|
// The test verifies the round-trip: a PDF containing "Hello PDF" yields that string.
|
||||||
|
dir := t.TempDir()
|
||||||
|
pdfPath := filepath.Join(dir, "test.pdf")
|
||||||
|
|
||||||
|
// Generate a minimal single-page PDF using a here-doc approach.
|
||||||
|
// This is a valid minimal PDF containing the text "Hello PDF".
|
||||||
|
minimalPDF := "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n" +
|
||||||
|
"2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n" +
|
||||||
|
"3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>>>>>>>endobj\n" +
|
||||||
|
"4 0 obj<</Length 44>>\nstream\nBT /F1 12 Tf 100 700 Td (Hello PDF) Tj ET\nendstream\nendobj\n" +
|
||||||
|
"xref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000058 00000 n\n0000000115 00000 n\n0000000310 00000 n\n" +
|
||||||
|
"trailer<</Size 5/Root 1 0 R>>\nstartxref\n406\n%%EOF\n"
|
||||||
|
require.NoError(t, os.WriteFile(pdfPath, []byte(minimalPDF), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(pdfPath)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, got, "Hello PDF")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Add `"os/exec"` to imports in `extract_test.go`.
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails (or skips)**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v -run TestText_PDF
|
||||||
|
```
|
||||||
|
Expected: SKIP (pdftotext not installed locally) or FAIL with "not implemented".
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement pdf.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractPDF runs pdftotext on path and returns the extracted text.
|
||||||
|
// pdftotext must be installed (package: poppler-utils on Alpine/Debian, poppler on Homebrew).
|
||||||
|
func extractPDF(path string) (string, error) {
|
||||||
|
cmd := exec.Command("pdftotext", "-q", path, "-")
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
errMsg := strings.TrimSpace(stderr.String())
|
||||||
|
if errMsg == "" {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("pdftotext: %s", errMsg)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(stdout.String()), nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run all extract tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/extract/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS (PDF test skips if pdftotext absent, passes if present).
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/extract/pdf.go internal/extract/extract_test.go
|
||||||
|
git commit -m "feat(extract): implement PDF extraction via pdftotext"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 3: `Entry.Aliases` + inventory reads aliases from frontmatter
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/wiki/types.go`
|
||||||
|
- Modify: `ingestion/internal/wiki/inventory.go`
|
||||||
|
- Modify: `ingestion/internal/wiki/inventory_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write failing test for alias loading**
|
||||||
|
|
||||||
|
Add to `inventory_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func TestLoadInventory_ReadsAliases(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "entities", "ryan-singer.md"),
|
||||||
|
[]byte("---\ntitle: Ryan Singer\naliases:\n - Singer\n - R. Singer\n---\n\n## Description\n\nDesigner.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, inv[PageTypeEntity], 1)
|
||||||
|
e := inv[PageTypeEntity][0]
|
||||||
|
assert.Equal(t, "Ryan Singer", e.Title)
|
||||||
|
assert.Equal(t, []string{"Singer", "R. Singer"}, e.Aliases)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/wiki/... -v -run TestLoadInventory_ReadsAliases
|
||||||
|
```
|
||||||
|
Expected: compile error — `Entry` has no `Aliases` field.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Add Aliases to Entry in types.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Entry is a summary of an existing wiki page used to build the inventory.
|
||||||
|
type Entry struct {
|
||||||
|
Slug string
|
||||||
|
Title string
|
||||||
|
Aliases []string
|
||||||
|
Type PageType
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Replace readTitle with readFrontmatter in inventory.go**
|
||||||
|
|
||||||
|
Replace the `readTitle` function and its call site:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// readFrontmatter extracts title and aliases from YAML frontmatter.
|
||||||
|
// Falls back to slug for title and empty aliases on any error.
|
||||||
|
func readFrontmatter(path, fallbackSlug string) (title string, aliases []string) {
|
||||||
|
title = fallbackSlug
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
inFM := false
|
||||||
|
inAliases := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break // end of frontmatter
|
||||||
|
}
|
||||||
|
if !inFM {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect alias list items (lines starting with " - ").
|
||||||
|
if inAliases {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(trimmed, "- ") {
|
||||||
|
aliases = append(aliases, strings.TrimPrefix(trimmed, "- "))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
inAliases = false // end of alias block
|
||||||
|
}
|
||||||
|
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "title":
|
||||||
|
title = strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
case "aliases":
|
||||||
|
inAliases = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Update `LoadInventory` to use `readFrontmatter`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
title, aliases := readFrontmatter(path, slug)
|
||||||
|
result[pt] = append(result[pt], Entry{Slug: slug, Title: title, Aliases: aliases, Type: pt})
|
||||||
|
```
|
||||||
|
|
||||||
|
Remove the old `readTitle` function entirely.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run all wiki tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/wiki/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — all existing tests plus new alias test.
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/wiki/types.go internal/wiki/inventory.go internal/wiki/inventory_test.go
|
||||||
|
git commit -m "feat(wiki): add Aliases to Entry and read from YAML frontmatter"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 4: Fuzzy entity resolution
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Create: `ingestion/internal/pipeline/resolve.go`
|
||||||
|
- Create: `ingestion/internal/pipeline/resolve_test.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Write failing tests**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/resolve_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestResolve_NoMatch(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/new-person.md", Content: "---\ntitle: New Person\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/new-person.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_TitleMatchRedirectsSlug(t *testing.T) {
|
||||||
|
// Proposed slug differs from existing but title matches.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/ryan-singer-the-designer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_AliasMatchRedirectsSlug(t *testing.T) {
|
||||||
|
// Proposed title matches an existing alias.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/singer.md", Content: "---\ntitle: Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer", "R. Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_NormalizationCaseAndArticles(t *testing.T) {
|
||||||
|
// "the shape up method" normalizes to "shape up method" which matches "Shape Up Method".
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/the-shape-up-method.md", Content: "---\ntitle: The Shape Up Method\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "shape-up-method", Title: "Shape Up Method", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/shape-up-method.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_OnlyMatchesSamePageType(t *testing.T) {
|
||||||
|
// A concept slug must not redirect to an entity with the same normalized name.
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/ryan-singer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
// Not redirected — different page type.
|
||||||
|
assert.Equal(t, "wiki/concepts/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_EmptyInventory(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/first.md", Content: "---\ntitle: First\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Equal(t, proposed, got)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run to verify it fails**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v -run TestResolve
|
||||||
|
```
|
||||||
|
Expected: compile error — `Resolve` not defined.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Implement resolve.go**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// ingestion/internal/pipeline/resolve.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Resolve remaps proposed pages to existing slugs when a fuzzy title match is found.
|
||||||
|
// It only matches within the same page type (entities→entities, concepts→concepts).
|
||||||
|
// Pages with no inventory match are returned unchanged.
|
||||||
|
func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page {
|
||||||
|
// Build normalized lookup: normalized_title → canonical slug, keyed by page type.
|
||||||
|
type key struct {
|
||||||
|
pt wiki.PageType
|
||||||
|
normalized string
|
||||||
|
}
|
||||||
|
lookup := make(map[key]string) // key → canonical slug
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(e.Title)}
|
||||||
|
lookup[k] = e.Slug
|
||||||
|
for _, alias := range e.Aliases {
|
||||||
|
ak := key{pt: pt, normalized: normalizeTitle(alias)}
|
||||||
|
if _, exists := lookup[ak]; !exists {
|
||||||
|
lookup[ak] = e.Slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]wiki.Page, 0, len(proposed))
|
||||||
|
for _, page := range proposed {
|
||||||
|
pt := pageTypeFromPath(page.Path)
|
||||||
|
title := extractTitle(page.Content)
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(title)}
|
||||||
|
if canonicalSlug, ok := lookup[k]; ok {
|
||||||
|
// Redirect path to canonical slug.
|
||||||
|
dir := filepath.Dir(page.Path)
|
||||||
|
page.Path = dir + "/" + canonicalSlug + ".md"
|
||||||
|
}
|
||||||
|
out = append(out, page)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeTitle lowercases, removes leading articles, collapses whitespace.
|
||||||
|
// "The Shape Up Method" → "shape up method"
|
||||||
|
func normalizeTitle(s string) string {
|
||||||
|
s = strings.ToLower(strings.TrimSpace(s))
|
||||||
|
// Strip leading articles.
|
||||||
|
for _, article := range []string{"the ", "a ", "an "} {
|
||||||
|
s = strings.TrimPrefix(s, article)
|
||||||
|
}
|
||||||
|
// Collapse internal whitespace and replace hyphens.
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
return strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md".
|
||||||
|
func pageTypeFromPath(path string) wiki.PageType {
|
||||||
|
parts := strings.Split(filepath.ToSlash(path), "/")
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
return wiki.PageType(parts[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractTitle reads the title field from YAML frontmatter in content.
|
||||||
|
// Falls back to empty string if not found.
|
||||||
|
func extractTitle(content string) string {
|
||||||
|
lines := strings.SplitN(content, "\n", 30)
|
||||||
|
inFM := false
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if inFM {
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if ok && strings.TrimSpace(key) == "title" {
|
||||||
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 4: Run resolve tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v -run TestResolve
|
||||||
|
```
|
||||||
|
Expected: PASS — 6 tests passing.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/pipeline/resolve.go internal/pipeline/resolve_test.go
|
||||||
|
git commit -m "feat(pipeline): add fuzzy entity resolution to prevent slug proliferation"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 5: Wire Resolve into pipeline.Run
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/pipeline/pipeline.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add Resolve call after ParsePages in Run()**
|
||||||
|
|
||||||
|
In `pipeline.go`, locate the loop that builds `allPages`. After `allPages = append(allPages, pages...)`, we have all pages from all chunks. Resolve must run after all chunks are merged, against the snapshot inventory loaded at the start of the run.
|
||||||
|
|
||||||
|
Replace the `merged := mergeAll(allPages)` line with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
The full relevant section of `Run` after this change:
|
||||||
|
|
||||||
|
```go
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
userPrompt := BuildPrompt(schema, source, chunk, inventory)
|
||||||
|
output, err := cfg.Complete(ctx, systemPrompt, userPrompt)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, fmt.Errorf("LLM call: %w", err)
|
||||||
|
}
|
||||||
|
pages, warnings := ParsePages(output)
|
||||||
|
allPages = append(allPages, pages...)
|
||||||
|
allWarnings = append(allWarnings, warnings...)
|
||||||
|
}
|
||||||
|
|
||||||
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Run all pipeline tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./internal/pipeline/... -v
|
||||||
|
```
|
||||||
|
Expected: PASS — all existing tests still pass (Resolve is a no-op when inventory is empty or no title matches).
|
||||||
|
|
||||||
|
- [ ] **Step 3: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/pipeline/pipeline.go
|
||||||
|
git commit -m "feat(pipeline): resolve proposed pages against inventory before writing"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 6: Wire extract.Text into watcher and handler
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/internal/watcher/watcher.go`
|
||||||
|
- Modify: `ingestion/internal/api/handler.go`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Update watcher.go**
|
||||||
|
|
||||||
|
In `processFile`, replace:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, string(content), source, false)
|
||||||
|
```
|
||||||
|
|
||||||
|
With:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, err := extract.Text(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("extract text: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, content, source, false)
|
||||||
|
```
|
||||||
|
|
||||||
|
Add import: `"github.com/mathiasbq/hyperguild/ingestion/internal/extract"`
|
||||||
|
|
||||||
|
Remove import: `"os"` if no longer used (check — `os` is still used for `os.MkdirAll`, `os.WriteFile`, `os.Stat`; keep it).
|
||||||
|
|
||||||
|
- [ ] **Step 2: Update handler.go — single-file path**
|
||||||
|
|
||||||
|
In `IngestPath`, the single-file branch reads:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := os.ReadFile(req.Path)
|
||||||
|
if readErr != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("read file: %v", readErr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := extract.Text(req.Path)
|
||||||
|
if readErr != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 3: Update handler.go — directory walk branch**
|
||||||
|
|
||||||
|
In `IngestPath`, the directory walk reads:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := os.ReadFile(path)
|
||||||
|
if readErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("read %s: %v", path, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, string(content), source, req.DryRun)
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```go
|
||||||
|
content, readErr := extract.Text(path)
|
||||||
|
if readErr != nil {
|
||||||
|
allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
source := req.Source
|
||||||
|
if source == "" {
|
||||||
|
source = filepath.Base(path)
|
||||||
|
}
|
||||||
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
|
```
|
||||||
|
|
||||||
|
Add import: `"github.com/mathiasbq/hyperguild/ingestion/internal/extract"` to handler.go.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Build to verify no compile errors**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go build ./...
|
||||||
|
```
|
||||||
|
Expected: success, no errors.
|
||||||
|
|
||||||
|
- [ ] **Step 5: Run all tests**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && go test ./...
|
||||||
|
```
|
||||||
|
Expected: PASS — all tests pass (watcher tests use .md files, already covered by extract passthrough).
|
||||||
|
|
||||||
|
- [ ] **Step 6: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add internal/watcher/watcher.go internal/api/handler.go
|
||||||
|
git commit -m "feat(watcher,api): use extract.Text() for file reading — fixes PDF ingestion"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 7: Add poppler-utils to Dockerfile
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `ingestion/Dockerfile`
|
||||||
|
|
||||||
|
- [ ] **Step 1: Add apk install for poppler-utils**
|
||||||
|
|
||||||
|
In `ingestion/Dockerfile`, add `poppler-utils` to the Alpine runtime stage. The current final stage is:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace with:
|
||||||
|
|
||||||
|
```dockerfile
|
||||||
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
RUN apk add --no-cache poppler-utils
|
||||||
|
|
||||||
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
```
|
||||||
|
|
||||||
|
- [ ] **Step 2: Verify Dockerfile builds (local Docker)**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && docker build -t ingestion:test .
|
||||||
|
```
|
||||||
|
Expected: image builds successfully; `pdftotext` is available inside.
|
||||||
|
|
||||||
|
- [ ] **Step 3: Verify pdftotext is accessible in the image**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm ingestion:test pdftotext -v
|
||||||
|
```
|
||||||
|
Expected: prints version string like `pdftotext version 24.x.x`.
|
||||||
|
|
||||||
|
- [ ] **Step 4: Commit**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ingestion && git add Dockerfile
|
||||||
|
git commit -m "chore(docker): add poppler-utils for PDF text extraction"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Self-Review
|
||||||
|
|
||||||
|
**Spec coverage check:**
|
||||||
|
|
||||||
|
| Requirement | Task |
|
||||||
|
|---|---|
|
||||||
|
| PDF extraction via pdftotext | Tasks 2, 6, 7 |
|
||||||
|
| .md and .txt passthrough (no regression) | Task 1 |
|
||||||
|
| Unsupported extension → clear error | Task 1 |
|
||||||
|
| Entry.Aliases loaded from frontmatter | Task 3 |
|
||||||
|
| Fuzzy normalization (case, articles, hyphens) | Task 4 |
|
||||||
|
| Alias matching | Task 4 |
|
||||||
|
| Title matching across different proposed slugs | Task 4 |
|
||||||
|
| Cross-page-type isolation (concept ≠ entity) | Task 4 |
|
||||||
|
| Resolve wired into pipeline.Run | Task 5 |
|
||||||
|
| extract.Text wired into watcher | Task 6 |
|
||||||
|
| extract.Text wired into handler (single + dir) | Task 6 |
|
||||||
|
| Dockerfile includes poppler-utils | Task 7 |
|
||||||
|
|
||||||
|
**Placeholder scan:** None found.
|
||||||
|
|
||||||
|
**Type consistency:**
|
||||||
|
- `Resolve([]wiki.Page, map[wiki.PageType][]wiki.Entry) []wiki.Page` — consistent across Tasks 4 and 5.
|
||||||
|
- `extract.Text(path string) (string, error)` — consistent across Tasks 1, 2, and 6.
|
||||||
|
- `Entry.Aliases []string` — added in Task 3, used by Resolve in Task 4 (reads `e.Aliases`).
|
||||||
|
- `readFrontmatter` replaces `readTitle` entirely in Task 3 — no lingering `readTitle` calls.
|
||||||
@@ -15,6 +15,8 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
|||||||
|
|
||||||
FROM alpine:3.21
|
FROM alpine:3.21
|
||||||
|
|
||||||
|
RUN apk add --no-cache poppler-utils
|
||||||
|
|
||||||
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||||
|
|
||||||
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
)
|
)
|
||||||
@@ -214,16 +215,16 @@ func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) {
|
|||||||
if !supportedExtensions[ext] {
|
if !supportedExtensions[ext] {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
content, readErr := os.ReadFile(path)
|
content, readErr := extract.Text(path)
|
||||||
if readErr != nil {
|
if readErr != nil {
|
||||||
allWarnings = append(allWarnings, fmt.Sprintf("read %s: %v", path, readErr))
|
allWarnings = append(allWarnings, fmt.Sprintf("extract %s: %v", path, readErr))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
source := req.Source
|
source := req.Source
|
||||||
if source == "" {
|
if source == "" {
|
||||||
source = filepath.Base(path)
|
source = filepath.Base(path)
|
||||||
}
|
}
|
||||||
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, string(content), source, req.DryRun)
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr))
|
allWarnings = append(allWarnings, fmt.Sprintf("ingest %s: %v", path, runErr))
|
||||||
return nil
|
return nil
|
||||||
@@ -243,16 +244,16 @@ func (h *Handler) IngestPath(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext))
|
writeError(w, http.StatusBadRequest, fmt.Sprintf("unsupported file extension: %s", ext))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
content, readErr := os.ReadFile(req.Path)
|
content, readErr := extract.Text(req.Path)
|
||||||
if readErr != nil {
|
if readErr != nil {
|
||||||
writeError(w, http.StatusInternalServerError, fmt.Sprintf("read file: %v", readErr))
|
writeError(w, http.StatusInternalServerError, fmt.Sprintf("extract text: %v", readErr))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
source := req.Source
|
source := req.Source
|
||||||
if source == "" {
|
if source == "" {
|
||||||
source = filepath.Base(req.Path)
|
source = filepath.Base(req.Path)
|
||||||
}
|
}
|
||||||
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, string(content), source, req.DryRun)
|
result, runErr := pipeline.Run(r.Context(), h.pipeline, h.brainDir, content, source, req.DryRun)
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr)
|
h.logger.Error("ingest-path failed", "path", req.Path, "err", runErr)
|
||||||
writeError(w, http.StatusInternalServerError, "ingest error")
|
writeError(w, http.StatusInternalServerError, "ingest error")
|
||||||
|
|||||||
39
ingestion/internal/extract/extract.go
Normal file
39
ingestion/internal/extract/extract.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
// ingestion/internal/extract/extract.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Text reads the file at path and returns its plain-text content.
|
||||||
|
// Supported extensions: .md, .txt (passthrough), .pdf (via pdftotext).
|
||||||
|
func Text(path string) (string, error) {
|
||||||
|
ext := strings.ToLower(fileExt(path))
|
||||||
|
switch ext {
|
||||||
|
case ".md", ".txt":
|
||||||
|
b, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return string(b), nil
|
||||||
|
case ".pdf":
|
||||||
|
return extractPDF(path)
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unsupported file extension: %s", ext)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fileExt returns the file extension including the dot, lowercased.
|
||||||
|
func fileExt(path string) string {
|
||||||
|
for i := len(path) - 1; i >= 0; i-- {
|
||||||
|
if path[i] == '.' {
|
||||||
|
return path[i:]
|
||||||
|
}
|
||||||
|
if path[i] == '/' || path[i] == '\\' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
62
ingestion/internal/extract/extract_test.go
Normal file
62
ingestion/internal/extract/extract_test.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
// ingestion/internal/extract/extract_test.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestText_Markdown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.md")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("# Hello\n\nWorld."), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "# Hello\n\nWorld.", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_Txt(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "note.txt")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("plain text"), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "plain text", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_UnsupportedExtension(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "data.csv")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte("a,b,c"), 0o644))
|
||||||
|
|
||||||
|
_, err := Text(path)
|
||||||
|
assert.ErrorContains(t, err, "unsupported")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestText_PDF(t *testing.T) {
|
||||||
|
if _, err := exec.LookPath("pdftotext"); err != nil {
|
||||||
|
t.Skip("pdftotext not available")
|
||||||
|
}
|
||||||
|
dir := t.TempDir()
|
||||||
|
pdfPath := filepath.Join(dir, "test.pdf")
|
||||||
|
|
||||||
|
// Minimal valid PDF containing the text "Hello PDF".
|
||||||
|
minimalPDF := "%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n" +
|
||||||
|
"2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj\n" +
|
||||||
|
"3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R/Contents 4 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>>>>>>>endobj\n" +
|
||||||
|
"4 0 obj<</Length 44>>\nstream\nBT /F1 12 Tf 100 700 Td (Hello PDF) Tj ET\nendstream\nendobj\n" +
|
||||||
|
"xref\n0 5\n0000000000 65535 f\n0000000009 00000 n\n0000000058 00000 n\n0000000115 00000 n\n0000000310 00000 n\n" +
|
||||||
|
"trailer<</Size 5/Root 1 0 R>>\nstartxref\n406\n%%EOF\n"
|
||||||
|
require.NoError(t, os.WriteFile(pdfPath, []byte(minimalPDF), 0o644))
|
||||||
|
|
||||||
|
got, err := Text(pdfPath)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, got, "Hello PDF")
|
||||||
|
}
|
||||||
28
ingestion/internal/extract/pdf.go
Normal file
28
ingestion/internal/extract/pdf.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// ingestion/internal/extract/pdf.go
|
||||||
|
package extract
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractPDF runs pdftotext on path and returns the extracted text.
|
||||||
|
// pdftotext must be installed (package: poppler-utils on Alpine/Debian, poppler on Homebrew).
|
||||||
|
func extractPDF(path string) (string, error) {
|
||||||
|
cmd := exec.Command("pdftotext", "-q", path, "-")
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd.Stdout = &stdout
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
errMsg := strings.TrimSpace(stderr.String())
|
||||||
|
if errMsg == "" {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("pdftotext: %s", errMsg)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(stdout.String()), nil
|
||||||
|
}
|
||||||
@@ -57,7 +57,8 @@ func Run(ctx context.Context, cfg Config, brainDir, content, source string, dryR
|
|||||||
allWarnings = append(allWarnings, warnings...)
|
allWarnings = append(allWarnings, warnings...)
|
||||||
}
|
}
|
||||||
|
|
||||||
merged := mergeAll(allPages)
|
resolved := Resolve(allPages, inventory)
|
||||||
|
merged := mergeAll(resolved)
|
||||||
|
|
||||||
date := time.Now().UTC().Format("2006-01-02")
|
date := time.Now().UTC().Format("2006-01-02")
|
||||||
var written []string
|
var written []string
|
||||||
|
|||||||
88
ingestion/internal/pipeline/resolve.go
Normal file
88
ingestion/internal/pipeline/resolve.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// ingestion/internal/pipeline/resolve.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Resolve remaps proposed pages to existing slugs when a fuzzy title match is found.
|
||||||
|
// It only matches within the same page type (entities→entities, concepts→concepts).
|
||||||
|
// Pages with no inventory match are returned unchanged.
|
||||||
|
func Resolve(proposed []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) []wiki.Page {
|
||||||
|
type key struct {
|
||||||
|
pt wiki.PageType
|
||||||
|
normalized string
|
||||||
|
}
|
||||||
|
lookup := make(map[key]string) // key → canonical slug
|
||||||
|
for pt, entries := range inventory {
|
||||||
|
for _, e := range entries {
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(e.Title)}
|
||||||
|
lookup[k] = e.Slug
|
||||||
|
for _, alias := range e.Aliases {
|
||||||
|
ak := key{pt: pt, normalized: normalizeTitle(alias)}
|
||||||
|
if _, exists := lookup[ak]; !exists {
|
||||||
|
lookup[ak] = e.Slug
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]wiki.Page, 0, len(proposed))
|
||||||
|
for _, page := range proposed {
|
||||||
|
pt := pageTypeFromPath(page.Path)
|
||||||
|
title := extractTitle(page.Content)
|
||||||
|
k := key{pt: pt, normalized: normalizeTitle(title)}
|
||||||
|
if canonicalSlug, ok := lookup[k]; ok {
|
||||||
|
dir := filepath.Dir(page.Path)
|
||||||
|
page.Path = dir + "/" + canonicalSlug + ".md"
|
||||||
|
}
|
||||||
|
out = append(out, page)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeTitle lowercases, removes leading articles, collapses whitespace.
|
||||||
|
// "The Shape Up Method" → "shape up method"
|
||||||
|
func normalizeTitle(s string) string {
|
||||||
|
s = strings.ToLower(strings.TrimSpace(s))
|
||||||
|
for _, article := range []string{"the ", "a ", "an "} {
|
||||||
|
s = strings.TrimPrefix(s, article)
|
||||||
|
}
|
||||||
|
s = strings.ReplaceAll(s, "-", " ")
|
||||||
|
return strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageTypeFromPath extracts the wiki.PageType from a path like "wiki/entities/foo.md".
|
||||||
|
func pageTypeFromPath(path string) wiki.PageType {
|
||||||
|
parts := strings.Split(filepath.ToSlash(path), "/")
|
||||||
|
if len(parts) >= 2 {
|
||||||
|
return wiki.PageType(parts[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractTitle reads the title field from YAML frontmatter in content.
|
||||||
|
// Falls back to empty string if not found.
|
||||||
|
func extractTitle(content string) string {
|
||||||
|
lines := strings.SplitN(content, "\n", 30)
|
||||||
|
inFM := false
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFM {
|
||||||
|
inFM = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if inFM {
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if ok && strings.TrimSpace(key) == "title" {
|
||||||
|
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
90
ingestion/internal/pipeline/resolve_test.go
Normal file
90
ingestion/internal/pipeline/resolve_test.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// ingestion/internal/pipeline/resolve_test.go
|
||||||
|
package pipeline
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestResolve_NoMatch(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/new-person.md", Content: "---\ntitle: New Person\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/new-person.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_TitleMatchRedirectsSlug(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/ryan-singer-the-designer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_AliasMatchRedirectsSlug(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/singer.md", Content: "---\ntitle: Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: []string{"Singer", "R. Singer"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/entities/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_NormalizationCaseAndArticles(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/the-shape-up-method.md", Content: "---\ntitle: The Shape Up Method\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeConcept: {
|
||||||
|
{Slug: "shape-up-method", Title: "Shape Up Method", Aliases: nil},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/shape-up-method.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_OnlyMatchesSamePageType(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/concepts/ryan-singer.md", Content: "---\ntitle: Ryan Singer\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{
|
||||||
|
wiki.PageTypeEntity: {
|
||||||
|
{Slug: "ryan-singer", Title: "Ryan Singer", Aliases: nil},
|
||||||
|
},
|
||||||
|
wiki.PageTypeConcept: {},
|
||||||
|
}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "wiki/concepts/ryan-singer.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolve_EmptyInventory(t *testing.T) {
|
||||||
|
proposed := []wiki.Page{
|
||||||
|
{Path: "wiki/entities/first.md", Content: "---\ntitle: First\n---\n"},
|
||||||
|
}
|
||||||
|
inventory := map[wiki.PageType][]wiki.Entry{}
|
||||||
|
got := Resolve(proposed, inventory)
|
||||||
|
assert.Equal(t, proposed, got)
|
||||||
|
}
|
||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -99,12 +100,12 @@ func processFile(ctx context.Context, cfg Config, path, date string) error {
|
|||||||
filename := filepath.Base(path)
|
filename := filepath.Base(path)
|
||||||
source := deriveSource(filename)
|
source := deriveSource(filename)
|
||||||
|
|
||||||
content, err := os.ReadFile(path)
|
content, err := extract.Text(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("read file: %w", err)
|
return fmt.Errorf("extract text: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, string(content), source, false)
|
_, runErr := pipeline.Run(ctx, cfg.Pipeline, cfg.BrainDir, content, source, false)
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
// Copy to failed/ and leave a .failed marker so we don't retry.
|
// Copy to failed/ and leave a .failed marker so we don't retry.
|
||||||
failedDir := filepath.Join(cfg.BrainDir, "raw", "failed")
|
failedDir := filepath.Join(cfg.BrainDir, "raw", "failed")
|
||||||
|
|||||||
@@ -32,23 +32,26 @@ func LoadInventory(brainDir string) (map[PageType][]Entry, error) {
|
|||||||
}
|
}
|
||||||
slug := strings.TrimSuffix(e.Name(), ".md")
|
slug := strings.TrimSuffix(e.Name(), ".md")
|
||||||
path := filepath.Join(dir, e.Name())
|
path := filepath.Join(dir, e.Name())
|
||||||
title := readTitle(path, slug)
|
title, aliases := readFrontmatter(path, slug)
|
||||||
result[pt] = append(result[pt], Entry{Slug: slug, Title: title, Type: pt})
|
result[pt] = append(result[pt], Entry{Slug: slug, Title: title, Aliases: aliases, Type: pt})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// readTitle extracts the title from YAML frontmatter, falling back to slug.
|
// readFrontmatter extracts title and aliases from YAML frontmatter.
|
||||||
func readTitle(path, fallback string) string {
|
// Falls back to slug for title and empty aliases on any error.
|
||||||
|
func readFrontmatter(path, fallbackSlug string) (title string, aliases []string) {
|
||||||
|
title = fallbackSlug
|
||||||
f, err := os.Open(path)
|
f, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fallback
|
return
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(f)
|
scanner := bufio.NewScanner(f)
|
||||||
inFM := false
|
inFM := false
|
||||||
|
inAliases := false
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
if strings.TrimSpace(line) == "---" {
|
if strings.TrimSpace(line) == "---" {
|
||||||
@@ -56,14 +59,32 @@ func readTitle(path, fallback string) string {
|
|||||||
inFM = true
|
inFM = true
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
break
|
break // end of frontmatter
|
||||||
}
|
}
|
||||||
if inFM {
|
if !inFM {
|
||||||
key, val, ok := strings.Cut(line, ":")
|
continue
|
||||||
if ok && strings.TrimSpace(key) == "title" {
|
}
|
||||||
return strings.Trim(strings.TrimSpace(val), `"'`)
|
|
||||||
|
// Detect alias list items (lines starting with " - ").
|
||||||
|
if inAliases {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(trimmed, "- ") {
|
||||||
|
aliases = append(aliases, strings.TrimPrefix(trimmed, "- "))
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
inAliases = false // end of alias block
|
||||||
|
}
|
||||||
|
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "title":
|
||||||
|
title = strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
case "aliases":
|
||||||
|
inAliases = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return fallback
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,3 +60,24 @@ func TestLoadInventory_MissingDirsOk(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.NotNil(t, inv)
|
assert.NotNil(t, inv)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadInventory_ReadsAliases(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "entities"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "sources"), 0o755))
|
||||||
|
|
||||||
|
require.NoError(t, os.WriteFile(
|
||||||
|
filepath.Join(dir, "wiki", "entities", "ryan-singer.md"),
|
||||||
|
[]byte("---\ntitle: Ryan Singer\naliases:\n - Singer\n - R. Singer\n---\n\n## Description\n\nDesigner.\n"),
|
||||||
|
0o644,
|
||||||
|
))
|
||||||
|
|
||||||
|
inv, err := LoadInventory(dir)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, inv[PageTypeEntity], 1)
|
||||||
|
e := inv[PageTypeEntity][0]
|
||||||
|
assert.Equal(t, "Ryan Singer", e.Title)
|
||||||
|
assert.Equal(t, []string{"Singer", "R. Singer"}, e.Aliases)
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ func Slug(title string) string {
|
|||||||
case unicode.IsLetter(r) || unicode.IsDigit(r):
|
case unicode.IsLetter(r) || unicode.IsDigit(r):
|
||||||
b.WriteRune(r)
|
b.WriteRune(r)
|
||||||
prevHyphen = false
|
prevHyphen = false
|
||||||
// all other characters (apostrophes, colons, dots, etc.) are dropped
|
// all other characters (apostrophes, colons, dots, etc.) are dropped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return strings.TrimRight(b.String(), "-")
|
return strings.TrimRight(b.String(), "-")
|
||||||
|
|||||||
@@ -18,7 +18,8 @@ type Page struct {
|
|||||||
|
|
||||||
// Entry is a summary of an existing wiki page used to build the inventory.
|
// Entry is a summary of an existing wiki page used to build the inventory.
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
Slug string
|
Slug string
|
||||||
Title string
|
Title string
|
||||||
Type PageType
|
Aliases []string
|
||||||
|
Type PageType
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user