// ingestion/internal/pipeline/chunk.go package pipeline import "strings" // Chunk splits content into pieces of at most maxSize bytes, splitting at // paragraph boundaries (\n\n). If maxSize <= 0, returns content as one chunk. func Chunk(content string, maxSize int) []string { content = strings.TrimSpace(content) if maxSize <= 0 || len(content) <= maxSize { return []string{content} } paragraphs := strings.Split(content, "\n\n") var chunks []string var cur strings.Builder for _, para := range paragraphs { para = strings.TrimSpace(para) if para == "" { continue } addition := para if cur.Len() > 0 { addition = "\n\n" + para } if cur.Len() > 0 && cur.Len()+len(addition) > maxSize { chunks = append(chunks, cur.String()) cur.Reset() cur.WriteString(para) } else { cur.WriteString(addition) } } if cur.Len() > 0 { chunks = append(chunks, cur.String()) } return chunks }