feat(ingestion): add full-text wiki search package
Implements search.Query which walks brainDir/wiki/**/*.md, scores files by term-frequency across query tokens, and returns results sorted by score descending. Uses only stdlib — no external search deps. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
110
ingestion/internal/search/search.go
Normal file
110
ingestion/internal/search/search.go
Normal file
@@ -0,0 +1,110 @@
|
||||
// ingestion/internal/search/search.go
|
||||
package search
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Result is a single search hit from the brain wiki.
|
||||
type Result struct {
|
||||
Path string `json:"path"`
|
||||
Title string `json:"title"`
|
||||
Excerpt string `json:"excerpt"`
|
||||
Score int `json:"score"`
|
||||
}
|
||||
|
||||
// Query searches all .md files under brainDir/wiki/ for pages containing
|
||||
// any of the whitespace-separated terms in query. Returns up to limit results
|
||||
// sorted by score descending.
|
||||
func Query(brainDir, query string, limit int) ([]Result, error) {
|
||||
if limit <= 0 {
|
||||
limit = 5
|
||||
}
|
||||
terms := strings.Fields(strings.ToLower(query))
|
||||
if len(terms) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var results []Result
|
||||
|
||||
err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil || d.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||
return err
|
||||
}
|
||||
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil // skip unreadable files
|
||||
}
|
||||
|
||||
lower := strings.ToLower(string(content))
|
||||
score := 0
|
||||
for _, term := range terms {
|
||||
score += strings.Count(lower, term)
|
||||
}
|
||||
if score == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
rel, _ := filepath.Rel(brainDir, path)
|
||||
rel = filepath.ToSlash(rel)
|
||||
|
||||
results = append(results, Result{
|
||||
Path: rel,
|
||||
Title: extractTitle(string(content), d.Name()),
|
||||
Excerpt: excerpt(string(content), 300),
|
||||
Score: score,
|
||||
})
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sort.Slice(results, func(i, j int) bool {
|
||||
return results[i].Score > results[j].Score
|
||||
})
|
||||
if len(results) > limit {
|
||||
results = results[:limit]
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func extractTitle(content, filename string) string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||
inFrontmatter := false
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.TrimSpace(line) == "---" {
|
||||
if !inFrontmatter {
|
||||
inFrontmatter = true
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if inFrontmatter {
|
||||
key, val, ok := strings.Cut(line, ":")
|
||||
if ok && strings.TrimSpace(key) == "title" {
|
||||
return strings.Trim(strings.TrimSpace(val), `"'`)
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.TrimSuffix(filename, ".md")
|
||||
}
|
||||
|
||||
func excerpt(content string, maxLen int) string {
|
||||
// Skip frontmatter, return first maxLen chars of body.
|
||||
parts := strings.SplitN(content, "---", 3)
|
||||
body := content
|
||||
if len(parts) == 3 {
|
||||
body = strings.TrimSpace(parts[2])
|
||||
}
|
||||
if len(body) > maxLen {
|
||||
return body[:maxLen] + "…"
|
||||
}
|
||||
return body
|
||||
}
|
||||
54
ingestion/internal/search/search_test.go
Normal file
54
ingestion/internal/search/search_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
// ingestion/internal/search/search_test.go
|
||||
package search_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||
|
||||
// Write a concept page mentioning "retry"
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", "retry-logic.md"),
|
||||
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
|
||||
0o644,
|
||||
))
|
||||
// Write an unrelated page
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", "database.md"),
|
||||
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
|
||||
0o644,
|
||||
))
|
||||
|
||||
results, err := search.Query(dir, "retry transient", 5)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path)
|
||||
assert.Equal(t, "Retry Logic", results[0].Title)
|
||||
assert.Greater(t, results[0].Score, 0)
|
||||
assert.Contains(t, results[0].Excerpt, "Retry")
|
||||
}
|
||||
|
||||
func TestSearch_RespectsLimit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||
for i := 0; i < 5; i++ {
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)),
|
||||
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
|
||||
0o644,
|
||||
))
|
||||
}
|
||||
results, err := search.Query(dir, "retry", 3)
|
||||
require.NoError(t, err)
|
||||
assert.LessOrEqual(t, len(results), 3)
|
||||
}
|
||||
Reference in New Issue
Block a user