Files
hyperguild/ingestion/internal/pipeline/prompt.go
Mathias Bergqvist 103f4d90bf feat(ingestion): add pipeline orchestrator with prompt builder
Adds prompt.go (BuildPrompt + systemPrompt) and pipeline.go (Run, Config,
Result, mergeAll) that wire chunking, LLM calls, parse, merge, index rebuild,
and log append into a single ingestion pipeline. Includes integration tests
covering write, dry-run, and duplicate-path merge scenarios.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 22:45:19 +02:00

61 lines
2.2 KiB
Go

// ingestion/internal/pipeline/prompt.go
package pipeline
import (
"fmt"
"strings"
"time"
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
)
const systemPrompt = `You are a wiki agent. Read the source material and produce structured wiki pages following the schema provided.
Output ONLY a valid JSON array — no markdown fences, no other text before or after.
Each element must have:
"path" — relative path within the wiki, e.g. "wiki/sources/foo.md"
"content" — full markdown content of the page including YAML frontmatter
Follow the schema strictly: correct frontmatter fields, wikilinks as [[slug|Display Text]],
dates in YYYY-MM-DD format, and paraphrase rather than quoting verbatim.`
// BuildPrompt constructs the user prompt for a single chunk.
func BuildPrompt(schema, source, content string, inventory map[wiki.PageType][]wiki.Entry) string {
var sb strings.Builder
fmt.Fprintf(&sb, "Today's date is %s.\n\n", time.Now().UTC().Format("2006-01-02"))
sb.WriteString("## Schema\n\n")
sb.WriteString(schema)
sb.WriteString("\n\n")
sb.WriteString("## Existing wiki pages\n\n")
sb.WriteString("Link ONLY to pages in this inventory or pages you are creating in this response.\n\n")
for _, pt := range []wiki.PageType{wiki.PageTypeConcept, wiki.PageTypeEntity, wiki.PageTypeSource} {
entries := inventory[pt]
label := strings.ToUpper(string(pt)[:1]) + string(pt)[1:]
if len(entries) == 0 {
fmt.Fprintf(&sb, "%s — (none yet)\n\n", label)
continue
}
fmt.Fprintf(&sb, "%s — link ONLY under the matching section:\n", label)
for _, e := range entries {
fmt.Fprintf(&sb, " - [[%s|%s]]\n", e.Slug, e.Title)
}
sb.WriteString("\n")
}
sb.WriteString("## Non-negotiable rules\n\n")
sb.WriteString("1. Output ONLY a valid JSON array — no prose, no fences.\n")
sb.WriteString("2. Slugs are kebab-case: lowercase, spaces→hyphens, no special chars.\n")
sb.WriteString("3. Wikilinks: [[slug|Display Text]] — the pipe is required.\n")
sb.WriteString("4. Section links must match their section type.\n")
sb.WriteString("5. One source page per book — update it if inventory shows it exists.\n\n")
fmt.Fprintf(&sb, "## Source: %s\n\n", source)
sb.WriteString(content)
return sb.String()
}