feat(pipeline): replace ParsePages with ParseRawPages + RawPage type
Strips slug authority from the LLM. The new RawPage type carries only
{title, type, subtype, domain, content} — no paths or frontmatter.
Pipeline will derive slugs deterministically (Task 4).
pipeline.go gets a temporary bridge stub (TODO task4) to keep the
package compiling between tasks.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,13 +5,21 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
// ParsePages parses LLM output as a JSON array of {path, content} objects.
|
||||
// RawPage is the LLM's output format — minimal structured data with no path or frontmatter.
|
||||
// The pipeline derives slugs, paths, and frontmatter from these fields.
|
||||
type RawPage struct {
|
||||
Title string `json:"title"`
|
||||
Type string `json:"type"` // "source" | "concept" | "entity"
|
||||
Subtype string `json:"subtype"` // entity: person|company|tool|model|framework|technology; source: article|pdf|book|video|note|project
|
||||
Domain string `json:"domain"`
|
||||
Content string `json:"content"` // Markdown body only — no frontmatter
|
||||
}
|
||||
|
||||
// ParseRawPages parses LLM output as a JSON array of RawPage objects.
|
||||
// If the array is truncated mid-object (token limit), it salvages all complete objects.
|
||||
func ParsePages(output string) ([]wiki.Page, []string) {
|
||||
func ParseRawPages(output string) ([]RawPage, []string) {
|
||||
output = strings.TrimSpace(output)
|
||||
if output == "" {
|
||||
return nil, []string{"LLM returned empty output"}
|
||||
@@ -19,7 +27,7 @@ func ParsePages(output string) ([]wiki.Page, []string) {
|
||||
|
||||
output = stripFences(output)
|
||||
|
||||
var pages []wiki.Page
|
||||
var pages []RawPage
|
||||
if err := json.Unmarshal([]byte(output), &pages); err == nil {
|
||||
return pages, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user