Adds prompt.go (BuildPrompt + systemPrompt) and pipeline.go (Run, Config, Result, mergeAll) that wire chunking, LLM calls, parse, merge, index rebuild, and log append into a single ingestion pipeline. Includes integration tests covering write, dry-run, and duplicate-path merge scenarios. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
61 lines
2.2 KiB
Go
61 lines
2.2 KiB
Go
// ingestion/internal/pipeline/prompt.go
|
|
package pipeline
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
|
)
|
|
|
|
const systemPrompt = `You are a wiki agent. Read the source material and produce structured wiki pages following the schema provided.
|
|
|
|
Output ONLY a valid JSON array — no markdown fences, no other text before or after.
|
|
Each element must have:
|
|
"path" — relative path within the wiki, e.g. "wiki/sources/foo.md"
|
|
"content" — full markdown content of the page including YAML frontmatter
|
|
|
|
Follow the schema strictly: correct frontmatter fields, wikilinks as [[slug|Display Text]],
|
|
dates in YYYY-MM-DD format, and paraphrase rather than quoting verbatim.`
|
|
|
|
// BuildPrompt constructs the user prompt for a single chunk.
|
|
func BuildPrompt(schema, source, content string, inventory map[wiki.PageType][]wiki.Entry) string {
|
|
var sb strings.Builder
|
|
|
|
fmt.Fprintf(&sb, "Today's date is %s.\n\n", time.Now().UTC().Format("2006-01-02"))
|
|
|
|
sb.WriteString("## Schema\n\n")
|
|
sb.WriteString(schema)
|
|
sb.WriteString("\n\n")
|
|
|
|
sb.WriteString("## Existing wiki pages\n\n")
|
|
sb.WriteString("Link ONLY to pages in this inventory or pages you are creating in this response.\n\n")
|
|
|
|
for _, pt := range []wiki.PageType{wiki.PageTypeConcept, wiki.PageTypeEntity, wiki.PageTypeSource} {
|
|
entries := inventory[pt]
|
|
label := strings.ToUpper(string(pt)[:1]) + string(pt)[1:]
|
|
if len(entries) == 0 {
|
|
fmt.Fprintf(&sb, "%s — (none yet)\n\n", label)
|
|
continue
|
|
}
|
|
fmt.Fprintf(&sb, "%s — link ONLY under the matching section:\n", label)
|
|
for _, e := range entries {
|
|
fmt.Fprintf(&sb, " - [[%s|%s]]\n", e.Slug, e.Title)
|
|
}
|
|
sb.WriteString("\n")
|
|
}
|
|
|
|
sb.WriteString("## Non-negotiable rules\n\n")
|
|
sb.WriteString("1. Output ONLY a valid JSON array — no prose, no fences.\n")
|
|
sb.WriteString("2. Slugs are kebab-case: lowercase, spaces→hyphens, no special chars.\n")
|
|
sb.WriteString("3. Wikilinks: [[slug|Display Text]] — the pipe is required.\n")
|
|
sb.WriteString("4. Section links must match their section type.\n")
|
|
sb.WriteString("5. One source page per book — update it if inventory shows it exists.\n\n")
|
|
|
|
fmt.Fprintf(&sb, "## Source: %s\n\n", source)
|
|
sb.WriteString(content)
|
|
|
|
return sb.String()
|
|
}
|