feat(ingestion): add background file watcher for brain/raw/
Polls brain/raw/ on a configurable ticker, derives human-readable source names from filenames, runs the pipeline, and moves files to processed/YYYY-MM-DD/ on success or failed/ on error with a log.md entry. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
219
ingestion/internal/watcher/watcher_test.go
Normal file
219
ingestion/internal/watcher/watcher_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
// ingestion/internal/watcher/watcher_test.go
|
||||
package watcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||
"github.com/mathiasbq/hyperguild/ingestion/internal/wiki"
|
||||
)
|
||||
|
||||
// successComplete returns a valid JSON-encoded page array for any call.
|
||||
func successComplete(page wiki.Page) pipeline.CompleteFunc {
|
||||
return func(ctx context.Context, system, user string) (string, error) {
|
||||
b, err := json.Marshal([]wiki.Page{page})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
}
|
||||
|
||||
// errorComplete always returns an error simulating an LLM failure.
|
||||
func errorComplete(_ context.Context, _, _ string) (string, error) {
|
||||
return "", fmt.Errorf("LLM unavailable")
|
||||
}
|
||||
|
||||
func setupBrainDir(t *testing.T) string {
|
||||
t.Helper()
|
||||
brainDir := t.TempDir()
|
||||
for _, sub := range []string{"wiki/concepts", "wiki/entities", "wiki/sources", "raw"} {
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, sub), 0o755))
|
||||
}
|
||||
return brainDir
|
||||
}
|
||||
|
||||
func TestStart_ProcessesFile(t *testing.T) {
|
||||
brainDir := setupBrainDir(t)
|
||||
|
||||
// Place a .md file in raw/.
|
||||
rawFile := filepath.Join(brainDir, "raw", "shape-up-book.md")
|
||||
require.NoError(t, os.WriteFile(rawFile, []byte("Content about Shape Up."), 0o644))
|
||||
|
||||
date := time.Now().UTC().Format("2006-01-02")
|
||||
wikiPage := wiki.Page{
|
||||
Path: "wiki/sources/shape-up-book.md",
|
||||
Content: "---\ntitle: Shape Up Book\ntype: article\ndomain: product-management\ndate_ingested: " + date + "\nlast_updated: " + date + "\naliases:\n - Shape Up Book\n---\n\n## Summary\n\nA book about Shape Up.\n",
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
BrainDir: brainDir,
|
||||
Interval: 50 * time.Millisecond,
|
||||
Pipeline: pipeline.Config{
|
||||
Complete: successComplete(wikiPage),
|
||||
ChunkSize: 0,
|
||||
Schema: "# Schema\nThree page types.",
|
||||
},
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
Start(ctx, cfg)
|
||||
|
||||
// Poll until the file is moved to processed/.
|
||||
processedPath := filepath.Join(brainDir, "raw", "processed", date, "shape-up-book.md")
|
||||
var found bool
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
if _, err := os.Stat(processedPath); err == nil {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
require.True(t, found, "file should be moved to processed/")
|
||||
|
||||
// Original file should be gone.
|
||||
_, err := os.Stat(rawFile)
|
||||
assert.True(t, os.IsNotExist(err), "original file should be gone from raw/")
|
||||
|
||||
// Wiki page should exist.
|
||||
wikiPath := filepath.Join(brainDir, "wiki", "sources", "shape-up-book.md")
|
||||
_, err = os.Stat(wikiPath)
|
||||
assert.NoError(t, err, "wiki page should be written")
|
||||
|
||||
// log.md should contain an ingest record.
|
||||
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(logContent), "— ingest")
|
||||
}
|
||||
|
||||
func TestStart_MovesToFailedOnError(t *testing.T) {
|
||||
brainDir := setupBrainDir(t)
|
||||
|
||||
rawFile := filepath.Join(brainDir, "raw", "bad-file.md")
|
||||
require.NoError(t, os.WriteFile(rawFile, []byte("Some content."), 0o644))
|
||||
|
||||
cfg := Config{
|
||||
BrainDir: brainDir,
|
||||
Interval: 50 * time.Millisecond,
|
||||
Pipeline: pipeline.Config{
|
||||
Complete: errorComplete,
|
||||
ChunkSize: 0,
|
||||
Schema: "# Schema\nThree page types.",
|
||||
},
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
Start(ctx, cfg)
|
||||
|
||||
// Poll until the file is moved to failed/.
|
||||
failedPath := filepath.Join(brainDir, "raw", "failed", "bad-file.md")
|
||||
var found bool
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
if _, err := os.Stat(failedPath); err == nil {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
require.True(t, found, "file should be moved to failed/")
|
||||
|
||||
// Original file should be gone from raw/.
|
||||
_, err := os.Stat(rawFile)
|
||||
assert.True(t, os.IsNotExist(err), "original file should be gone from raw/")
|
||||
|
||||
// log.md should contain a watcher error entry.
|
||||
logContent, err := os.ReadFile(filepath.Join(brainDir, "log.md"))
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(logContent), "— watcher error")
|
||||
assert.Contains(t, string(logContent), "bad-file.md")
|
||||
}
|
||||
|
||||
func TestDeriveSource(t *testing.T) {
|
||||
tests := []struct {
|
||||
filename string
|
||||
want string
|
||||
}{
|
||||
{"shape-up-book.md", "Shape Up Book"},
|
||||
{"raft-consensus.txt", "Raft Consensus"},
|
||||
{"my-note.md", "My Note"},
|
||||
{"single.md", "Single"},
|
||||
{"no-extension", "No Extension"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.filename, func(t *testing.T) {
|
||||
got := deriveSource(tc.filename)
|
||||
assert.Equal(t, tc.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessDir_SkipsSubdirs(t *testing.T) {
|
||||
brainDir := setupBrainDir(t)
|
||||
|
||||
// Create processed/ and failed/ subdirs with files inside.
|
||||
for _, sub := range []string{"processed/2026-04-22", "failed"} {
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "raw", sub), 0o755))
|
||||
}
|
||||
|
||||
processedFile := filepath.Join(brainDir, "raw", "processed", "2026-04-22", "old-file.md")
|
||||
failedFile := filepath.Join(brainDir, "raw", "failed", "broken-file.md")
|
||||
require.NoError(t, os.WriteFile(processedFile, []byte("old"), 0o644))
|
||||
require.NoError(t, os.WriteFile(failedFile, []byte("broken"), 0o644))
|
||||
|
||||
// Also place a valid file in raw/ root that should be processed.
|
||||
validFile := filepath.Join(brainDir, "raw", "valid.md")
|
||||
require.NoError(t, os.WriteFile(validFile, []byte("valid content"), 0o644))
|
||||
|
||||
date := time.Now().UTC().Format("2006-01-02")
|
||||
|
||||
// Track which sources were passed to Complete.
|
||||
var processedSources []string
|
||||
completeFn := func(ctx context.Context, system, user string) (string, error) {
|
||||
// Record that this was called; return a minimal valid page.
|
||||
page := wiki.Page{
|
||||
Path: "wiki/sources/valid.md",
|
||||
Content: "---\ntitle: Valid\n---\n\n## Summary\n\nValid.\n",
|
||||
}
|
||||
b, _ := json.Marshal([]wiki.Page{page})
|
||||
processedSources = append(processedSources, "called")
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
BrainDir: brainDir,
|
||||
Interval: time.Hour, // not used; we call processDir directly
|
||||
Pipeline: pipeline.Config{
|
||||
Complete: completeFn,
|
||||
ChunkSize: 0,
|
||||
Schema: "# Schema\nThree page types.",
|
||||
},
|
||||
}
|
||||
|
||||
errs := processDir(context.Background(), cfg, date)
|
||||
assert.Empty(t, errs, "no errors expected")
|
||||
|
||||
// Complete should have been called exactly once (for valid.md, not for files in subdirs).
|
||||
assert.Len(t, processedSources, 1, "only the file in raw/ root should be processed")
|
||||
|
||||
// Files in processed/ and failed/ must remain untouched.
|
||||
_, err := os.Stat(processedFile)
|
||||
assert.NoError(t, err, "processed subdir file should be untouched")
|
||||
_, err = os.Stat(failedFile)
|
||||
assert.NoError(t, err, "failed subdir file should be untouched")
|
||||
}
|
||||
Reference in New Issue
Block a user