hyperguild/ingestion/internal/claudewatcher/scrubber_test.go

package claudewatcher

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestScrub_PoisonedFixtures(t *testing.T) {
	// One representative bad-string per rule. If a rule fires for the
	// wrong content shape later, this table localises the regression.
	cases := []struct {
		name    string
		content string
		want    string
	}{
		{"bearer-token", "curl -H 'Authorization: Bearer abcdef1234567890ghijklmnop'", "authorization-header"},
		{"bearer-no-header", "header = Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig", "bearer-token"},
		{"postgres-uri", "DATABASE_URL=postgres://user:s3cret@10.0.1.20:5432/brain", "postgres-uri-with-password"},
		{"private-key", "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaC1rZXktdjEAAAAA", "private-key"},
		{"ssh-public", "deploy: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK1234567890abcdefghij user@host", "ssh-key"},
		{"github-pat-classic", "GH_TOKEN=ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt", "github-pat"},
		{"openai-key", "OPENAI_API_KEY=sk-proj-AAAABBBBCCCCDDDDEEEEFFFFGGGGHHHHIIII", "openai-sk"},
		{"anthropic-key", "ANTHROPIC_API_KEY=sk-ant-api03-aaaaBBBBccccDDDDeeeeFFFFggggHHHHiiiiJJJJkkkk", "anthropic-sk"},
		{"aws-access-key", "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE", "aws-access-key"},
		{"homelab-env", "POSTGRES_PASSWORD=hunter2supersecretvalue", "homelab-env-token"},
		{"sops-marker", "value: ENC[AES256_GCM,data:abc123def456,iv:zzz]", "sops-encrypted-marker"},
	}
	for _, tc := range cases {
		t.Run(tc.name, func(t *testing.T) {
			got := Scrub(tc.content)
			assert.Equal(t, tc.want, got)
		})
	}
}

func TestScrub_CleanContentPassesThrough(t *testing.T) {
	cases := []string{
		"",
		"plain text with no credentials",
		"a 40 char hex string aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa is fine in isolation",
		"`Bearer` token mentioned in docs without an actual value",
		"file at ~/.ssh/id_ed25519",
		"the function Authorization() takes no args",
		"comment: see API key in 1Password",
	}
	for _, c := range cases {
		assert.Empty(t, Scrub(c), "expected clean for %q", c)
	}
}

func TestScrub_FirstMatchWins(t *testing.T) {
	// Content matching multiple rules: report the first rule order in
	// DefaultRules. Stability matters for log triage.
	content := "Authorization: Bearer ghp_aBcD1234EfGh5678IjKl9012MnOp3456QrSt"
	assert.Equal(t, "authorization-header", Scrub(content))
}