New package internal/claudewatcher. The volume gate (24 turns/week of agentsquad logs vs 500/week gate) exposed that the real signal lives in daily Claude Code usage at ~/.claude/projects/*/<uuid>.jsonl, not in agentsquad output. This package captures that signal. See infra#73 Track E + hyperguild#27 for the full reframe. Components: - parser: tolerant JSONL parser over the observed Claude Code session schema (user / assistant / attachment / system + bookkeeping types). Skip-flag fast-paths queue-operation, last-prompt, permission-mode, ai-title, bridge-session, file-history-snapshot. - scrubber: 11-rule fail-closed regex set for credential shapes (bearer, postgres URIs, PEM, ssh-key, ghp_/sk-/sk-ant-/AKIA, homelab env tokens, SOPS markers). Drop turn + log on match. - cursor: postgres-backed claude_session_cursors table, keyed by (host, file_path) with byte_offset. Resumable across pod restarts. - watcher: poll loop. Walks SessionsDir, processes each .jsonl from its cursor offset, runs scrubber, emits a Batch per file to a Sink interface, advances cursor on successful Ingest. No classifier integration in this commit — every kept turn is emitted in a per-session batch. The cmd/server wiring (next commit) routes batches to brain/wiki/claude-sessions/facts/. Classifier-driven hall routing (decisions / failures / hypotheses) is a follow-up. 19 unit tests across parser + scrubber + watcher. task check green. Refs: infra#73, hyperguild#27
58 lines
2.3 KiB
Go
58 lines
2.3 KiB
Go
package claudewatcher
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestScrub_PoisonedFixtures(t *testing.T) {
|
|
// One representative bad-string per rule. If a rule fires for the
|
|
// wrong content shape later, this table localises the regression.
|
|
cases := []struct {
|
|
name string
|
|
content string
|
|
want string
|
|
}{
|
|
{"bearer-token", "curl -H 'Authorization: Bearer abcdef1234567890ghijklmnop'", "authorization-header"},
|
|
{"bearer-no-header", "header = Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig", "bearer-token"},
|
|
{"postgres-uri", "DATABASE_URL=postgres://user:s3cret@10.0.1.20:5432/brain", "postgres-uri-with-password"},
|
|
{"private-key", "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAA", "private-key"},
|
|
{"ssh-public", "deploy: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK1234567890abcdefghij user@host", "ssh-key"},
|
|
{"github-pat-classic", "GH_TOKEN=ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt", "github-pat"},
|
|
{"openai-key", "OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII", "openai-sk"},
|
|
{"anthropic-key", "ANTHROPIC_API_KEY=sk-ant-api03-aaaaBBBBccccDDDDeeeeFFFFggggHHHHiiiiJJJJkkkk", "anthropic-sk"},
|
|
{"aws-access-key", "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE", "aws-access-key"},
|
|
{"homelab-env", "POSTGRES_PASSWORD=hunter2supersecretvalue", "homelab-env-token"},
|
|
{"sops-marker", "value: ENC[AES256_GCM,data:abc123def456,iv:zzz]", "sops-encrypted-marker"},
|
|
}
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
got := Scrub(tc.content)
|
|
assert.Equal(t, tc.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestScrub_CleanContentPassesThrough(t *testing.T) {
|
|
cases := []string{
|
|
"",
|
|
"plain text with no credentials",
|
|
"a 40 char hex string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa is fine in isolation",
|
|
"`Bearer` token mentioned in docs without an actual value",
|
|
"file at ~/.ssh/id_ed25519",
|
|
"the function Authorization() takes no args",
|
|
"comment: see API key in 1Password",
|
|
}
|
|
for _, c := range cases {
|
|
assert.Empty(t, Scrub(c), "expected clean for %q", c)
|
|
}
|
|
}
|
|
|
|
func TestScrub_FirstMatchWins(t *testing.T) {
|
|
// Content matching multiple rules: report the first rule order in
|
|
// DefaultRules. Stability matters for log triage.
|
|
content := "Authorization: Bearer ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt"
|
|
assert.Equal(t, "authorization-header", Scrub(content))
|
|
}
|