71 lines
2.2 KiB
Go
71 lines
2.2 KiB
Go
// ingestion/internal/pipeline/links.go
|
|
package pipeline
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
// plainLinkRE matches [[Display Name]] — wikilinks without a slug pipe.
|
|
// It does NOT match [[slug|Display]] (those already have a pipe).
|
|
var plainLinkRE = regexp.MustCompile(`\[\[([^\]|]+)\]\]`)
|
|
|
|
// CanonicalizeLinks converts [[Display Name]] wikilinks to [[slug|Display Name]]
|
|
// using a title→slug map built from the inventory and current batch.
|
|
// Unknown titles are left as-is and returned as warnings.
|
|
func CanonicalizeLinks(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) ([]wiki.Page, []string) {
|
|
titleToSlug := buildTitleMap(pages, inventory)
|
|
|
|
var allWarnings []string
|
|
out := make([]wiki.Page, len(pages))
|
|
for i, p := range pages {
|
|
newContent, warnings := canonicalizeContent(p.Content, titleToSlug)
|
|
p.Content = newContent
|
|
out[i] = p
|
|
allWarnings = append(allWarnings, warnings...)
|
|
}
|
|
return out, allWarnings
|
|
}
|
|
|
|
// buildTitleMap builds a lowercase-title → slug map from inventory and current batch.
|
|
// Current batch entries take precedence over inventory (they may be updates).
|
|
func buildTitleMap(pages []wiki.Page, inventory map[wiki.PageType][]wiki.Entry) map[string]string {
|
|
m := make(map[string]string)
|
|
for _, entries := range inventory {
|
|
for _, e := range entries {
|
|
m[strings.ToLower(e.Title)] = e.Slug
|
|
}
|
|
}
|
|
// Current batch overrides inventory
|
|
for _, p := range pages {
|
|
title := extractTitle(p.Content)
|
|
slug := strings.TrimSuffix(filepath.Base(p.Path), ".md")
|
|
if title != "" && slug != "" {
|
|
m[strings.ToLower(title)] = slug
|
|
}
|
|
}
|
|
return m
|
|
}
|
|
|
|
func canonicalizeContent(content string, titleToSlug map[string]string) (string, []string) {
|
|
var warnings []string
|
|
result := plainLinkRE.ReplaceAllStringFunc(content, func(match string) string {
|
|
sub := plainLinkRE.FindStringSubmatch(match)
|
|
if len(sub) < 2 {
|
|
return match
|
|
}
|
|
displayName := sub[1]
|
|
slug, ok := titleToSlug[strings.ToLower(displayName)]
|
|
if !ok {
|
|
warnings = append(warnings, fmt.Sprintf("unknown wikilink: [[%s]]", displayName))
|
|
return match
|
|
}
|
|
return "[[" + slug + "|" + displayName + "]]"
|
|
})
|
|
return result, warnings
|
|
}
|