121 lines
2.9 KiB
Go
121 lines
2.9 KiB
Go
// ingestion/internal/wiki/merge.go
|
|
package wiki
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
var bulletSections = map[string]bool{
|
|
"Related Concepts": true,
|
|
"Related Entities": true,
|
|
"Sources": true,
|
|
"Key Claims": true,
|
|
"Entities Mentioned": true,
|
|
"Concepts Introduced or Reinforced": true,
|
|
"Chapters": true,
|
|
}
|
|
|
|
var appendSections = map[string]bool{
|
|
"Evolving Notes": true,
|
|
"Updates": true,
|
|
"Open Questions Raised": true,
|
|
"Open Questions": true,
|
|
}
|
|
|
|
type section struct {
|
|
heading string
|
|
content string
|
|
}
|
|
|
|
// Merge combines two Page values with the same path.
|
|
// Frontmatter is taken from a. Sections are merged by strategy:
|
|
// bullet sections union unique lines, append sections concatenate,
|
|
// all others keep a's version. Sections in b not present in a are appended.
|
|
func Merge(a, b Page) Page {
|
|
fmA, secsA := parseSections(a.Content)
|
|
_, secsB := parseSections(b.Content)
|
|
|
|
idx := make(map[string]int, len(secsA))
|
|
for i, s := range secsA {
|
|
idx[s.heading] = i
|
|
}
|
|
|
|
for _, sB := range secsB {
|
|
i, exists := idx[sB.heading]
|
|
if !exists {
|
|
idx[sB.heading] = len(secsA)
|
|
secsA = append(secsA, sB)
|
|
continue
|
|
}
|
|
sA := secsA[i]
|
|
switch {
|
|
case bulletSections[sB.heading]:
|
|
secsA[i].content = mergeBullets(sA.content, sB.content)
|
|
case appendSections[sB.heading]:
|
|
secsA[i].content = strings.TrimRight(sA.content, "\n") + "\n\n" + strings.TrimLeft(sB.content, "\n")
|
|
}
|
|
}
|
|
|
|
return Page{Path: a.Path, Content: rebuildContent(fmA, secsA)}
|
|
}
|
|
|
|
func parseSections(markdown string) (frontmatter string, sections []section) {
|
|
lines := strings.Split(markdown, "\n")
|
|
i := 0
|
|
|
|
if i < len(lines) && strings.TrimSpace(lines[i]) == "---" {
|
|
i++
|
|
var fmLines []string
|
|
for i < len(lines) {
|
|
if strings.TrimSpace(lines[i]) == "---" {
|
|
i++
|
|
break
|
|
}
|
|
fmLines = append(fmLines, lines[i])
|
|
i++
|
|
}
|
|
frontmatter = fmt.Sprintf("---\n%s\n---\n", strings.Join(fmLines, "\n"))
|
|
}
|
|
|
|
var cur *section
|
|
for ; i < len(lines); i++ {
|
|
line := lines[i]
|
|
if strings.HasPrefix(line, "## ") {
|
|
if cur != nil {
|
|
sections = append(sections, *cur)
|
|
}
|
|
cur = §ion{heading: strings.TrimPrefix(line, "## ")}
|
|
} else if cur != nil {
|
|
cur.content += line + "\n"
|
|
}
|
|
}
|
|
if cur != nil {
|
|
sections = append(sections, *cur)
|
|
}
|
|
return
|
|
}
|
|
|
|
func rebuildContent(frontmatter string, sections []section) string {
|
|
var sb strings.Builder
|
|
sb.WriteString(frontmatter)
|
|
for _, sec := range sections {
|
|
fmt.Fprintf(&sb, "\n## %s\n\n%s", sec.heading, sec.content)
|
|
}
|
|
return sb.String()
|
|
}
|
|
|
|
func mergeBullets(a, b string) string {
|
|
seen := make(map[string]bool)
|
|
var lines []string
|
|
for _, line := range strings.Split(a+b, "\n") {
|
|
trimmed := strings.TrimSpace(line)
|
|
if trimmed == "" || seen[trimmed] {
|
|
continue
|
|
}
|
|
seen[trimmed] = true
|
|
lines = append(lines, line)
|
|
}
|
|
return strings.Join(lines, "\n") + "\n"
|
|
}
|