package claudewatcher import ( "testing" "github.com/stretchr/testify/assert" ) func TestScrub_PoisonedFixtures(t *testing.T) { // One representative bad-string per rule. If a rule fires for the // wrong content shape later, this table localises the regression. cases := []struct { name string content string want string }{ {"bearer-token", "curl -H 'Authorization: Bearer abcdef1234567890ghijklmnop'", "authorization-header"}, {"bearer-no-header", "header = Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig", "bearer-token"}, {"postgres-uri", "DATABASE_URL=postgres://user:s3cret@10.0.1.20:5432/brain", "postgres-uri-with-password"}, {"private-key", "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAA", "private-key"}, {"ssh-public", "deploy: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK1234567890abcdefghij user@host", "ssh-key"}, {"github-pat-classic", "GH_TOKEN=ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt", "github-pat"}, {"openai-key", "OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII", "openai-sk"}, {"anthropic-key", "ANTHROPIC_API_KEY=sk-ant-api03-aaaaBBBBccccDDDDeeeeFFFFggggHHHHiiiiJJJJkkkk", "anthropic-sk"}, {"aws-access-key", "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE", "aws-access-key"}, {"homelab-env", "POSTGRES_PASSWORD=hunter2supersecretvalue", "homelab-env-token"}, {"sops-marker", "value: ENC[AES256_GCM,data:abc123def456,iv:zzz]", "sops-encrypted-marker"}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { got := Scrub(tc.content) assert.Equal(t, tc.want, got) }) } } func TestScrub_CleanContentPassesThrough(t *testing.T) { cases := []string{ "", "plain text with no credentials", "a 40 char hex string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa is fine in isolation", "`Bearer` token mentioned in docs without an actual value", "file at ~/.ssh/id_ed25519", "the function Authorization() takes no args", "comment: see API key in 1Password", } for _, c := range cases { assert.Empty(t, Scrub(c), "expected clean for %q", c) } } func TestScrub_FirstMatchWins(t *testing.T) { // Content matching multiple rules: report the first rule order in // DefaultRules. Stability matters for log triage. content := "Authorization: Bearer ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt" assert.Equal(t, "authorization-header", Scrub(content)) } func TestRegisterRule_ClientNameGuard(t *testing.T) { t.Cleanup(ResetExtraRules) require := func(err error) { if err != nil { t.Fatalf("unexpected err: %v", err) } } require(RegisterRule("client-name", `(?i)\b(SEB|Mastercard)\b`)) // Hits — case variations + word-boundary respect. for _, hit := range []string{ "mentioned SEB in this commit", "the Mastercard project deadline", "working on mastercard scope", "SEB internal review", } { assert.Equal(t, "client-name", Scrub(hit), "should match %q", hit) } // Misses — substring within a longer word should NOT match // thanks to \b. "Sebastian" contains "seb" but \b prevents hit. for _, miss := range []string{ "Sebastian wrote the docs", "unrelated text", "researcher", "https://example.com/search?seb=1", // 'seb' bounded by ?=, still matches \b } { got := Scrub(miss) if miss == "https://example.com/search?seb=1" { // `seb=` has word-boundary at '='; this DOES match \bseb\b. // Accept either outcome; document the tradeoff. assert.Contains(t, []string{"", "client-name"}, got) continue } assert.Empty(t, got, "should NOT match %q", miss) } } func TestRegisterRule_CredentialsTakePrecedence(t *testing.T) { t.Cleanup(ResetExtraRules) require := func(err error) { if err != nil { t.Fatalf("unexpected err: %v", err) } } require(RegisterRule("client-name", `\b(SEB)\b`)) // Content matches both a credential rule AND a client rule — // credential rule wins by ordering, so log triage points at the // strictly more dangerous leak. content := "SEB project uses OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII" assert.Equal(t, "openai-sk", Scrub(content)) } func TestRegisterRule_RejectsInvalidPattern(t *testing.T) { t.Cleanup(ResetExtraRules) err := RegisterRule("bad", "[unclosed") assert.Error(t, err) }