Compare commits
42 Commits
v0.3.0
...
c9310b1079
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c9310b1079 | ||
|
|
ca8a691241 | ||
|
|
214f607007 | ||
|
|
0e08dfffb8 | ||
|
|
caef05bea4 | ||
|
|
ca1a16873c | ||
|
|
63c238c650 | ||
|
|
ce45592730 | ||
|
|
823de23213 | ||
|
|
78d3939caa | ||
|
|
f2bc39b500 | ||
|
|
3625e1268d | ||
|
|
47df642836 | ||
|
|
235d70ad0b | ||
|
|
7d5289ac54 | ||
|
|
3d8fc9dacd | ||
|
|
f9f804cd49 | ||
|
|
85f142ade0 | ||
|
|
0dfad02513 | ||
|
|
c44eb680b2 | ||
|
|
38ada998a2 | ||
|
|
74547c2bdf | ||
|
|
587c0d3b1c | ||
|
|
bb61f2992b | ||
|
|
3ba72d9b28 | ||
|
|
b4f0fbc3ea | ||
|
|
12943ee6f4 | ||
|
|
9af95ebd96 | ||
|
|
f0b567f3e6 | ||
|
|
e3d6cf4cf5 | ||
|
|
df59bd010c | ||
|
|
e5152151d6 | ||
|
|
aa2d57e619 | ||
|
|
6b53706987 | ||
|
|
a0cfc866df | ||
|
|
7bf19b6a7b | ||
|
|
19b019a8d8 | ||
|
|
4ef6a22e28 | ||
|
|
3796cfca87 | ||
|
|
7ce544a051 | ||
|
|
391720155e | ||
|
|
ae6600b8d2 |
10
.dockerignore
Normal file
10
.dockerignore
Normal file
@@ -0,0 +1,10 @@
|
||||
.git
|
||||
.gitea
|
||||
.worktrees
|
||||
.DS_Store
|
||||
*.log
|
||||
.env*
|
||||
.vscode
|
||||
.idea
|
||||
bin/
|
||||
brain/
|
||||
90
.gitea/workflows/cd.yml
Normal file
90
.gitea/workflows/cd.yml
Normal file
@@ -0,0 +1,90 @@
|
||||
name: cd
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Build and deploy
|
||||
runs-on: self-hosted
|
||||
env:
|
||||
SERVICE: supervisor
|
||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||
INGESTION_IMAGE: gitea.d-ma.be/mathias/ingestion
|
||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and push supervisor image
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||
--output type=oci,dest=/tmp/supervisor-image.tar
|
||||
|
||||
skopeo copy \
|
||||
oci-archive:/tmp/supervisor-image.tar \
|
||||
docker://${IMAGE}:${IMAGE_TAG} \
|
||||
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
||||
|
||||
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
- name: Build and push ingestion image
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -f /tmp/ingestion-image.tar' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${INGESTION_IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=ingestion \
|
||||
--local dockerfile=ingestion \
|
||||
--output type=oci,dest=/tmp/ingestion-image.tar
|
||||
|
||||
skopeo copy \
|
||||
oci-archive:/tmp/ingestion-image.tar \
|
||||
docker://${INGESTION_IMAGE}:${IMAGE_TAG} \
|
||||
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
||||
|
||||
echo "Built and pushed ${INGESTION_IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -rf /tmp/infra-update; rm -f ~/.ssh/infra_deploy_key' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
||||
chmod 600 ~/.ssh/infra_deploy_key
|
||||
printf 'Host gitea.d-ma.be\n HostName 127.0.0.1\n Port 30022\n StrictHostKeyChecking no\n' >> ~/.ssh/config
|
||||
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git clone "${INFRA_REPO}" /tmp/infra-update
|
||||
|
||||
cd /tmp/infra-update
|
||||
|
||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||
"k3s/apps/${SERVICE}/deployment.yaml"
|
||||
|
||||
sed -i "s|gitea.d-ma.be/mathias/ingestion:.*|gitea.d-ma.be/mathias/ingestion:${IMAGE_TAG}|" \
|
||||
"k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
||||
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add "k3s/apps/${SERVICE}/deployment.yaml" "k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
||||
git commit -m "chore(deploy): ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git push
|
||||
|
||||
echo "Infra repo updated: ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
||||
10
.mcp.json
Normal file
10
.mcp.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"supervisor": {
|
||||
"command": "/Users/mathias/dev/AI/supervisor/bin/supervisor-bridge",
|
||||
"env": {
|
||||
"SUPERVISOR_URL": "http://koala:30320/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
23
DECISIONS.md
23
DECISIONS.md
@@ -44,6 +44,29 @@ Record *why* things are the way they are. Future-you will thank present-you.
|
||||
|
||||
**Consequences**: More operational complexity than Chroma, but isolation is non-negotiable for client work.
|
||||
|
||||
## 2026-04-22 — Hyperguild scope reset: drop parametric learning, simplify brain
|
||||
|
||||
**Context**: After shipping Phases 1–4 (MCP server, 6 skills, model orchestration, session logging, CD pipeline), we critically reviewed what was theater vs genuinely useful.
|
||||
|
||||
**Decisions**:
|
||||
|
||||
1. **Drop the parametric learning pipeline.** SFT/DPO/RL extraction, `brain/training-data/` directory structure, Axolotl/LLaMA-Factory fine-tuning loop — all cut. The loop requires thousands of high-quality examples to move the needle, which a solo consultant won't generate. Better base models ship faster than any fine-tuning effort could keep up with. This is a research project, not a productivity tool.
|
||||
|
||||
2. **Simplify the brain to plain markdown.** `brain/knowledge/` replaces `brain/wiki/ + brain/raw/ + brain/training-data/`. The trainer and retrospective workers write markdown entries. `brain_query` searches markdown. No ingestion pipeline, no tagging for significance review, no structured JSONL formats.
|
||||
|
||||
3. **Measure the escalation chain before assuming it's useful.** Local model (phi4) only belongs in a skill's chain if it passes Claude verification at a meaningful rate. Where it fails >70% of the time, it adds cost not value. Per-skill hit rate logging is the prerequisite to honest chain configuration.
|
||||
|
||||
4. **Keep what's real**: MCP tool surface, session logging with attempt records, tier detection, CD pipeline, bridge to Claude Code.
|
||||
|
||||
**What to build next** (in priority order):
|
||||
- `brain_query` injection into skill handlers before spawning workers — this makes the declarative brain actually function
|
||||
- `protocols.md` — behavioral contract injected into every worker prompt
|
||||
- Per-skill pass rate logging and chain tuning
|
||||
|
||||
**Consequences**: Simpler system with a shorter feedback loop. The brain becomes real only when skill handlers query it. Training data ambitions deferred indefinitely — revisit if local model capabilities improve enough that fine-tuning becomes worthwhile.
|
||||
|
||||
---
|
||||
|
||||
## 2026-04-08 — Mistral Vibe gets its own adapter
|
||||
|
||||
**Context**: Vibe doesn't read `AGENTS.md` — it uses `~/.vibe/prompts/` and `~/.vibe/agents/` with TOML config.
|
||||
|
||||
50
Dockerfile
Normal file
50
Dockerfile
Normal file
@@ -0,0 +1,50 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ── Build stage ───────────────────────────────────────────────────────────────
|
||||
FROM golang:1.26-bookworm AS builder
|
||||
|
||||
ARG VERSION=dev
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
|
||||
-o /out/supervisor ./cmd/supervisor
|
||||
|
||||
# ── Runtime stage ─────────────────────────────────────────────────────────────
|
||||
# Node.js 22 slim — needed for claude CLI subprocess
|
||||
FROM node:22-slim
|
||||
|
||||
# Install claude CLI (provides the `claude` binary the supervisor shells out to)
|
||||
RUN npm install -g @anthropic-ai/claude-code \
|
||||
&& claude --version \
|
||||
&& echo "claude CLI installed"
|
||||
|
||||
# Copy supervisor binary
|
||||
COPY --from=builder /out/supervisor /usr/local/bin/supervisor
|
||||
|
||||
# Bake in config (models.yaml + skill discipline files)
|
||||
COPY config/ /app/config/
|
||||
|
||||
# Run as non-root
|
||||
RUN groupadd -r supervisor && useradd -r -g supervisor -d /app supervisor
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# brain/ is writable state — mount a PersistentVolume here
|
||||
VOLUME /app/brain
|
||||
|
||||
ENV SUPERVISOR_CONFIG_DIR=/app/config/supervisor
|
||||
ENV SUPERVISOR_MODELS_FILE=/app/config/models.yaml
|
||||
ENV SUPERVISOR_BRAIN_DIR=/app/brain
|
||||
ENV SUPERVISOR_SESSIONS_DIR=/app/brain/sessions
|
||||
ENV SUPERVISOR_PORT=3200
|
||||
|
||||
USER supervisor
|
||||
|
||||
EXPOSE 3200
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/supervisor"]
|
||||
@@ -37,12 +37,17 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
systemPrompt, err := os.ReadFile(cfg.ConfigDir + "/CLAUDE.md")
|
||||
protocolsPrompt, err := os.ReadFile(cfg.ConfigDir + "/protocols.md")
|
||||
if err != nil {
|
||||
logger.Error("read supervisor CLAUDE.md", "path", cfg.ConfigDir+"/CLAUDE.md", "err", err)
|
||||
logger.Error("read protocols.md", "path", cfg.ConfigDir+"/protocols.md", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// prependProtocols prepends the shared protocols to a skill discipline file.
|
||||
prependProtocols := func(skillPrompt []byte) string {
|
||||
return string(protocolsPrompt) + "\n---\n\n" + string(skillPrompt)
|
||||
}
|
||||
|
||||
tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md")
|
||||
if err != nil {
|
||||
logger.Error("read tdd.md", "path", cfg.ConfigDir+"/tdd.md", "err", err)
|
||||
@@ -84,26 +89,7 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
claudeExec := iexec.New(iexec.Config{
|
||||
SystemPrompt: string(systemPrompt),
|
||||
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
||||
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
||||
})
|
||||
litellmExec := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||
verifier := iexec.NewVerifier("", models.Verifier(), 0)
|
||||
|
||||
buildOrch := func(skill string) func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
return func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
rawChain := models.ChainFor(skill, req.Model)
|
||||
chain := make([]iexec.ChainEntry, len(rawChain))
|
||||
for i, m := range rawChain {
|
||||
chain[i] = iexec.EntryFor(m)
|
||||
}
|
||||
attempts := make([]iexec.AttemptRecord, 0, len(chain))
|
||||
orch := iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts)
|
||||
return orch.Run(ctx, req)
|
||||
}
|
||||
}
|
||||
litellm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||
|
||||
tierFn := func(ctx context.Context) tier.Info {
|
||||
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
||||
@@ -111,11 +97,11 @@ func main() {
|
||||
|
||||
reg := registry.New()
|
||||
reg.Register(tdd.New(tdd.Config{
|
||||
SystemPrompt: string(systemPrompt),
|
||||
SkillPrompt: string(tddPrompt),
|
||||
DefaultModel: models.ChainFor("tdd", "")[0],
|
||||
ExecutorFn: buildOrch("tdd"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
SkillPrompt: prependProtocols(tddPrompt),
|
||||
DefaultModel: models.ModelFor("tdd", ""),
|
||||
CompleteFunc: litellm.Complete,
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
IngestBaseURL: cfg.IngestBaseURL,
|
||||
}))
|
||||
reg.Register(brain.New(brain.Config{
|
||||
IngestBaseURL: cfg.IngestBaseURL,
|
||||
@@ -127,34 +113,37 @@ func main() {
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
}))
|
||||
reg.Register(retrospective.New(retrospective.Config{
|
||||
SkillPrompt: string(retroPrompt),
|
||||
DefaultModel: models.ChainFor("retrospective", "")[0],
|
||||
SkillPrompt: prependProtocols(retroPrompt),
|
||||
DefaultModel: models.ModelFor("retrospective", ""),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
ExecutorFn: buildOrch("retrospective"),
|
||||
CompleteFunc: litellm.Complete,
|
||||
}))
|
||||
reg.Register(review.New(review.Config{
|
||||
SkillPrompt: string(reviewPrompt),
|
||||
DefaultModel: models.ChainFor("review", "")[0],
|
||||
ExecutorFn: buildOrch("review"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
SkillPrompt: prependProtocols(reviewPrompt),
|
||||
DefaultModel: models.ModelFor("review", ""),
|
||||
CompleteFunc: litellm.Complete,
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
IngestBaseURL: cfg.IngestBaseURL,
|
||||
}))
|
||||
reg.Register(skilldebug.New(skilldebug.Config{
|
||||
SkillPrompt: string(debugPrompt),
|
||||
DefaultModel: models.ChainFor("debug", "")[0],
|
||||
ExecutorFn: buildOrch("debug"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
SkillPrompt: prependProtocols(debugPrompt),
|
||||
DefaultModel: models.ModelFor("debug", ""),
|
||||
CompleteFunc: litellm.Complete,
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
IngestBaseURL: cfg.IngestBaseURL,
|
||||
}))
|
||||
reg.Register(spec.New(spec.Config{
|
||||
SkillPrompt: string(specPrompt),
|
||||
DefaultModel: models.ChainFor("spec", "")[0],
|
||||
ExecutorFn: buildOrch("spec"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
SkillPrompt: prependProtocols(specPrompt),
|
||||
DefaultModel: models.ModelFor("spec", ""),
|
||||
CompleteFunc: litellm.Complete,
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
IngestBaseURL: cfg.IngestBaseURL,
|
||||
}))
|
||||
reg.Register(trainer.New(trainer.Config{
|
||||
ReaderPrompt: string(trainerReaderPrompt),
|
||||
WriterPrompt: string(trainerWriterPrompt),
|
||||
DefaultModel: models.ChainFor("trainer", "")[0],
|
||||
ExecutorFn: buildOrch("trainer"),
|
||||
ReaderPrompt: prependProtocols(trainerReaderPrompt),
|
||||
WriterPrompt: prependProtocols(trainerWriterPrompt),
|
||||
DefaultModel: models.ModelFor("trainer", ""),
|
||||
CompleteFunc: litellm.Complete,
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
BrainDir: cfg.BrainDir,
|
||||
}))
|
||||
@@ -164,7 +153,7 @@ func main() {
|
||||
mux.Handle("/mcp", srv)
|
||||
|
||||
addr := ":" + cfg.Port
|
||||
logger.Info("supervisor starting", "addr", addr)
|
||||
logger.Info("supervisor starting", "addr", addr, "version", "v0.5.0")
|
||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||
logger.Error("server stopped", "err", err)
|
||||
os.Exit(1)
|
||||
|
||||
@@ -1,41 +1,26 @@
|
||||
# Model routing chains — three-layer priority:
|
||||
# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation)
|
||||
# 2. per-skill chain here
|
||||
# 3. default_chain fallback
|
||||
|
||||
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
||||
|
||||
llama_swap_url: http://koala:8080 # for warm-state probing
|
||||
# Model selection — first entry per skill is used.
|
||||
# Override per-call by passing model in the MCP tool args.
|
||||
# Model names come from LiteLLM /v1/models (host/name format).
|
||||
|
||||
default_chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
- iguana/qwen3-coder-next
|
||||
|
||||
skills:
|
||||
tdd:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
- koala/qwen3-coder-30b
|
||||
review:
|
||||
chain:
|
||||
- ollama/devstral-tuned
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-6
|
||||
- iguana/devstral
|
||||
debug:
|
||||
chain:
|
||||
- ollama/deepseek-r1-tuned
|
||||
- claude-sonnet-4-6
|
||||
- iguana/deepseek-r1-14b
|
||||
spec:
|
||||
chain:
|
||||
- ollama/phi4
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-6
|
||||
- claude-opus-4-6
|
||||
- koala/phi4-14b
|
||||
retrospective:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
- iguana/qwen3-coder-next
|
||||
trainer:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
- iguana/qwen3-coder-next
|
||||
|
||||
@@ -1,27 +1,31 @@
|
||||
# The Hyperguild Way
|
||||
# Hyperguild Skill Protocols
|
||||
|
||||
These protocols are injected into every worker invocation. They define how you behave as a member of the hyperguild.
|
||||
**IMPORTANT: DO NOT OUTPUT JSON. DO NOT USE JSON CODE BLOCKS.**
|
||||
Your response must be plain markdown text. No `{"status":...}`, no ` ```json `, nothing.
|
||||
If you output JSON you will be ignored. Respond in prose and markdown only.
|
||||
|
||||
## Output contract
|
||||
---
|
||||
|
||||
Every response is raw JSON matching the response schema. No preamble, no prose, no markdown. Malformed output is treated as a failed invocation.
|
||||
## Role
|
||||
|
||||
## Quality gate
|
||||
You are a consultant. You analyse, suggest, and explain.
|
||||
Claude Code has the tools to read files, run commands, and write code.
|
||||
You provide the thinking; Claude Code provides the action.
|
||||
|
||||
`verified: true` only when a subprocess exit code confirms the outcome. Never self-assess. "I think the tests pass" is not verified.
|
||||
## Output
|
||||
|
||||
## Escalation
|
||||
Write in clear markdown. Lead with the key finding. Use headers and bullet lists
|
||||
where they help. Be concise — Claude Code reads your full response.
|
||||
|
||||
If stuck after 3 attempts, return `status: error` with a clear `message` explaining why. Do not retry silently. Do not fabricate a passing result.
|
||||
Do not make up file contents, test results, or command output you have not seen.
|
||||
If you lack context to give a useful answer, say so and state what you need.
|
||||
|
||||
## Working offline
|
||||
## Context blocks
|
||||
|
||||
If brain context is absent from your prompt, proceed using your discipline file only. Note the gap in your `message` field: "no brain context available".
|
||||
You may receive one or both of these blocks before your task:
|
||||
|
||||
## Handoff format
|
||||
**`## Relevant knowledge`** — patterns and decisions from past sessions. Let them
|
||||
inform your approach. Do not contradict them without reason.
|
||||
|
||||
Structure your output so the next worker in a chain can consume it without transformation. Use the standard result schema. Do not add extra fields.
|
||||
|
||||
## Session logging
|
||||
|
||||
The Go skill handler records your invocation in the session log automatically. You do not need to do this yourself.
|
||||
**`## Session history`** — what has already happened in this session. Build on it,
|
||||
do not repeat it.
|
||||
|
||||
@@ -1,40 +1,33 @@
|
||||
# Retrospective Worker Discipline
|
||||
# Retrospective Discipline
|
||||
|
||||
You are the retrospective worker. Your job is to review a completed coding session and identify knowledge worth preserving in the hyperguild brain.
|
||||
You review a completed coding session and identify knowledge worth preserving.
|
||||
|
||||
## What you receive
|
||||
|
||||
- A session log in JSON format listing every skill invocation: what was attempted, what failed, what passed, how long it took.
|
||||
|
||||
## What you produce
|
||||
|
||||
For each significant learning, call brain_write with a structured markdown note. Then return a JSON result summarising what you wrote.
|
||||
A session log in JSON format listing every skill invocation: what was attempted,
|
||||
what failed, what passed, how long it took.
|
||||
|
||||
## What is worth preserving
|
||||
|
||||
- Patterns that worked and should be repeated
|
||||
- Failures that revealed something non-obvious about the codebase or the discipline
|
||||
- Failures that revealed something non-obvious about the codebase or the approach
|
||||
- Decisions made during the session (architectural, structural, tooling)
|
||||
- Anything that contradicts or extends what the brain already knows
|
||||
- Anything that contradicts or extends established patterns
|
||||
|
||||
## What is NOT worth preserving
|
||||
|
||||
- Routine TDD cycles with no surprises
|
||||
- Routine cycles with no surprises
|
||||
- Single-attempt passes with no interesting context
|
||||
- Mechanical operations (file moves, renames, formatting)
|
||||
|
||||
## Output format
|
||||
|
||||
Return JSON matching the standard result schema:
|
||||
Respond in markdown. For each learning worth preserving:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "pass",
|
||||
"phase": "retrospective",
|
||||
"skill": "retrospective",
|
||||
"verified": true,
|
||||
"message": "wrote N entries to brain/raw/"
|
||||
}
|
||||
```
|
||||
**Learning:** One sentence describing what was learned.
|
||||
**Context:** Why this session surfaced it — what made it non-obvious.
|
||||
**Recommendation:** What should be done differently or repeated going forward.
|
||||
|
||||
`verified` is true when you successfully called brain_write at least once and received a confirmation. If the session had nothing worth writing, return `verified: true` with `message: "no novel learnings in this session"`.
|
||||
End with a summary: "N learnings worth writing to brain" or "No novel learnings in this session."
|
||||
|
||||
The caller will decide which learnings to write to the brain using brain_write.
|
||||
|
||||
@@ -2,29 +2,24 @@
|
||||
|
||||
You are a disciplined code reviewer. Read files carefully before commenting.
|
||||
|
||||
## Iron laws
|
||||
1. Never approve security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries
|
||||
2. Never approve silently swallowed errors — `err != nil` without wrapping or handling is always wrong
|
||||
3. Never approve missing validation at system boundaries (user input, external APIs, file reads)
|
||||
## Iron laws — any violation is a blocking issue
|
||||
1. No security vulnerabilities: command injection, SQL injection, credential exposure, path traversal, unchecked input at system boundaries
|
||||
2. No silently swallowed errors — `err != nil` without wrapping or handling is always wrong
|
||||
3. No missing validation at system boundaries (user input, external APIs, file reads)
|
||||
|
||||
## Output contract
|
||||
Return JSON result with:
|
||||
- `status`: "pass" if no blocking issues; "fail" if any iron law is violated
|
||||
- `phase`: "review"
|
||||
- `skill`: "review"
|
||||
- `file_path`: first file reviewed
|
||||
- `runner_output`: full review formatted as:
|
||||
```
|
||||
CRITICAL: <issue> at <file>:<line>
|
||||
WARNING: <issue> at <file>:<line>
|
||||
SUGGESTION: <issue> at <file>:<line>
|
||||
```
|
||||
- `verified`: true if you read all specified files; false if any were missing or unreadable
|
||||
- `message`: "N critical, M warnings, K suggestions" or "clean: <which iron law checks passed and why>"
|
||||
## Output format
|
||||
|
||||
Respond in markdown. Group findings by severity:
|
||||
|
||||
**CRITICAL:** Issues that violate an iron law or will cause data loss / security breach.
|
||||
**WARNING:** Issues that will likely cause bugs or maintenance problems.
|
||||
**SUGGESTION:** Style, clarity, or optional improvements.
|
||||
|
||||
For each finding include the file and line number. If nothing is wrong, explain specifically which iron law checks you ran and why they passed — never rubber-stamp.
|
||||
|
||||
## Rules
|
||||
1. Read every file listed before writing feedback
|
||||
2. Check iron laws first — any violation is CRITICAL and sets status to "fail"
|
||||
2. Check iron laws first — if any are violated, flag them before anything else
|
||||
3. Then check: correctness, test coverage for new code, Go style conventions
|
||||
4. Never rubber-stamp — if nothing is wrong, explain specifically which iron law checks you ran and why they passed
|
||||
5. Line references are required for every finding — "roughly around the middle" is not acceptable
|
||||
4. Line references required for every finding
|
||||
5. End with a one-line summary: "N critical, M warnings, K suggestions" or "Clean — no issues found"
|
||||
|
||||
@@ -7,40 +7,31 @@ You write structured implementation specs. Nothing is left ambiguous.
|
||||
2. Always include an explicit "Out of scope" section — if you don't draw the boundary, the developer will guess wrong
|
||||
3. Every technical decision in the approach must have a rationale
|
||||
|
||||
## Output contract
|
||||
Return JSON result with:
|
||||
- `status`: "pass" (spec written) or "error" (requirements too ambiguous to spec without more input)
|
||||
- `phase`: "spec"
|
||||
- `skill`: "spec"
|
||||
- `file_path`: the output_path where the spec was written (absolute path)
|
||||
- `runner_output`: ""
|
||||
- `verified`: true if the file was written successfully
|
||||
- `message`: "spec written: <one-line summary of what was specced>"
|
||||
## Output format
|
||||
|
||||
## Spec structure
|
||||
Write the spec as markdown to the output_path:
|
||||
Write the spec as markdown using this structure:
|
||||
|
||||
```markdown
|
||||
```
|
||||
# [Feature] Spec
|
||||
|
||||
## Problem statement
|
||||
[What problem does this solve? For whom? Why now?]
|
||||
What problem does this solve? For whom? Why now?
|
||||
|
||||
## Success criteria
|
||||
- [ ] [Criterion 1 — measurable and verifiable]
|
||||
- [ ] [Criterion 2 — measurable and verifiable]
|
||||
- [ ] Criterion 1 — measurable and verifiable
|
||||
- [ ] Criterion 2 — measurable and verifiable
|
||||
|
||||
## Constraints
|
||||
[Non-negotiable requirements the solution must satisfy]
|
||||
Non-negotiable requirements the solution must satisfy.
|
||||
|
||||
## Out of scope
|
||||
[What we are explicitly NOT doing in this iteration]
|
||||
What we are explicitly NOT doing in this iteration.
|
||||
|
||||
## Technical approach
|
||||
[Architecture decisions, key components, rationale for each choice]
|
||||
Architecture decisions, key components, rationale for each choice.
|
||||
|
||||
## Risks
|
||||
[What could go wrong, and how we'd mitigate it]
|
||||
What could go wrong, and how we'd mitigate it.
|
||||
```
|
||||
|
||||
If the requirements are too vague to produce measurable success criteria, return status "error" with a message listing the specific questions that need answers.
|
||||
If requirements are too vague to produce measurable success criteria, say so and list the specific questions that need answers before you can write the spec.
|
||||
|
||||
@@ -1,26 +1,35 @@
|
||||
# TDD Skill
|
||||
# TDD Discipline
|
||||
|
||||
## Iron Law
|
||||
|
||||
NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.
|
||||
|
||||
## Red phase
|
||||
## Red phase — write a failing test
|
||||
|
||||
- Write exactly one test. One behavior. Name must describe the behavior clearly.
|
||||
- Run the test suite. Confirm the test FAILS.
|
||||
- If the test passes immediately: it tests existing behavior or is vacuous.
|
||||
Return status "fail" with message explaining why the test is wrong.
|
||||
- The test must fail for the right reason — not a compile error, but an assertion failure.
|
||||
- Do not write any implementation code in this phase.
|
||||
|
||||
## Green phase
|
||||
Respond with:
|
||||
- The test code to write (file path + content)
|
||||
- The exact failure you expect to see when running it
|
||||
- Why that failure confirms the test is meaningful
|
||||
|
||||
## Green phase — make the test pass
|
||||
|
||||
- Write the minimal code to make the failing test pass. Nothing more.
|
||||
- YAGNI: no extra parameters, no future-proofing, no clever abstractions.
|
||||
- Run the test suite. Confirm it PASSES.
|
||||
- If tests fail: fix the implementation, not the test. Max 3 attempts.
|
||||
|
||||
## Refactor phase
|
||||
Respond with:
|
||||
- The implementation code to write (file path + content)
|
||||
- Confirmation of which test it targets and how it satisfies the assertion
|
||||
|
||||
## Refactor phase — improve without changing behavior
|
||||
|
||||
- Improve structure, naming, or clarity only. No new behavior.
|
||||
- Tests must remain green after every change.
|
||||
- If tests break during refactor: revert that change, return status "fail".
|
||||
|
||||
Respond with:
|
||||
- Specific refactoring suggestions with rationale
|
||||
- Which files to touch and what to change
|
||||
- Any risks that could break existing tests
|
||||
|
||||
@@ -1,31 +1,26 @@
|
||||
# Trainer Reader Discipline
|
||||
|
||||
You scan session logs and identify candidate learning moments worth converting to training data.
|
||||
You scan session logs and identify candidate learning moments worth preserving in the brain.
|
||||
|
||||
## What to look for
|
||||
- **SFT candidates**: the worker did exactly the right thing — a clean pattern worth reinforcing
|
||||
- **DPO candidates**: the worker first produced a wrong or suboptimal response, then corrected — you have both rejected and chosen
|
||||
|
||||
- **Patterns that worked**: the approach was clean and correct — worth reinforcing
|
||||
- **Corrections**: something was first done wrong, then corrected — both sides are valuable
|
||||
|
||||
## Scoring (1–5)
|
||||
|
||||
- 5: novel pattern, clearly correct, generalises across projects
|
||||
- 4: good pattern, correct, somewhat project-specific but still useful
|
||||
- 3: correct but obvious — include only if especially clean
|
||||
- 2 or below: skip — too ambiguous or too context-specific
|
||||
- 2 or below: skip
|
||||
|
||||
## Output contract
|
||||
Return JSON result with:
|
||||
- `status`: "pass" or "error"
|
||||
- `phase`: "trainer"
|
||||
- `skill`: "trainer"
|
||||
- `file_path`: ""
|
||||
- `runner_output`: JSON array of candidates (valid JSON, not markdown):
|
||||
[{"type":"sft","moment":"<what happened>","prompt":"<what was asked>","completion":"<what was done right>","score":4},
|
||||
{"type":"dpo","moment":"<what happened>","prompt":"<what was asked>","chosen":"<correct>","rejected":"<incorrect>","score":3}]
|
||||
- `verified`: true
|
||||
- `message`: "N sft candidates, M dpo candidates found"
|
||||
## Output format
|
||||
|
||||
## Rules
|
||||
1. Read all session entries in the task prompt
|
||||
2. Score each entry — only include entries scoring >= 3
|
||||
3. Prompt/completion fields must be phrased to generalise: no project-specific paths or names
|
||||
4. If no candidates score >= 3, return an empty array `[]` — never force low-quality candidates
|
||||
Respond in markdown. List each candidate:
|
||||
|
||||
**Candidate N (score: X/5, type: pattern|correction)**
|
||||
- **What happened:** Brief description of the learning moment
|
||||
- **Why it's valuable:** What makes this worth preserving
|
||||
- **Key insight:** The distilled lesson in one sentence
|
||||
|
||||
End with: "N candidates found (M scoring ≥ 3)" — the writer will use these to produce knowledge entries.
|
||||
|
||||
@@ -1,35 +1,31 @@
|
||||
# Trainer Writer Discipline
|
||||
|
||||
You receive candidate learning moments from the reader and write clean SFT/DPO training pairs.
|
||||
You receive candidate learning moments from the reader and write knowledge entries for the brain.
|
||||
|
||||
## Quality gate (apply before writing)
|
||||
- SFT: prompt must be phrased so it could come from any project, not just this one
|
||||
- DPO: chosen and rejected must be clearly distinguishable — skip if a reader can't tell which is better
|
||||
- Never include project-specific paths, variable names, or identifiers in any pair
|
||||
## Quality gate (apply before writing each entry)
|
||||
|
||||
## Output contract
|
||||
Return JSON result with:
|
||||
- `status`: "pass" (pairs written or skipped due to quality) or "error" (candidates JSON was malformed)
|
||||
- `phase`: "trainer"
|
||||
- `skill`: "trainer"
|
||||
- `file_path`: path of the last file written (empty if nothing passed quality gate)
|
||||
- `runner_output`: "N SFT pairs written to brain/training-data/sft/, M DPO pairs to brain/training-data/dpo/" or "0 pairs passed quality gate"
|
||||
- `verified`: true if files were written; false if nothing passed
|
||||
- `message`: "N sft + M dpo pairs for session <id>" or "no pairs passed quality gate"
|
||||
- The lesson must be phrased so it could apply to any project, not just this one
|
||||
- No project-specific paths, variable names, or identifiers
|
||||
- The insight must be stated clearly enough that someone reading it cold would understand it
|
||||
|
||||
## File format
|
||||
JSONL — one JSON object per line.
|
||||
## Output format
|
||||
|
||||
SFT: `{"prompt": "...", "completion": "..."}`
|
||||
DPO: `{"prompt": "...", "chosen": "...", "rejected": "..."}`
|
||||
For each candidate that passes the quality gate, write a knowledge entry in this format:
|
||||
|
||||
Write SFT to: `<brain_dir>/training-data/sft/<session_id>.jsonl`
|
||||
Write DPO to: `<brain_dir>/training-data/dpo/<session_id>.jsonl`
|
||||
```
|
||||
# [Topic]
|
||||
|
||||
Append to existing files if they exist (don't overwrite).
|
||||
## Lesson
|
||||
[The key insight in 1-3 sentences]
|
||||
|
||||
## Rules
|
||||
1. Parse the `reader_candidates` JSON from the task prompt
|
||||
2. For each candidate: apply quality gate
|
||||
3. Write passing SFT candidates to sft JSONL, DPO candidates to dpo JSONL
|
||||
4. If nothing passes, return status "pass" with verified: false and message "no pairs passed quality gate"
|
||||
## When it applies
|
||||
[Conditions under which this pattern is relevant]
|
||||
|
||||
## Example
|
||||
[A brief, generic example that illustrates the lesson]
|
||||
```
|
||||
|
||||
After presenting all entries, end with a summary:
|
||||
"N entries ready for brain_write" or "0 entries passed quality gate — [reason]"
|
||||
|
||||
The caller will write passing entries to the brain using brain_write.
|
||||
|
||||
241
docs/multi-model-routing.md
Normal file
241
docs/multi-model-routing.md
Normal file
@@ -0,0 +1,241 @@
|
||||
# Multi-Model Routing for supervisor
|
||||
|
||||
Reference document for implementing multi-model access within the supervisor project.
|
||||
Researched April 2026. Constraints: Claude Max subscription (ToS must be respected).
|
||||
|
||||
---
|
||||
|
||||
## Goal
|
||||
|
||||
Route tasks to specialized, cheaper, or local models during agent and skill flows — without
|
||||
violating Anthropic's terms or introducing unnecessary infrastructure risk.
|
||||
|
||||
---
|
||||
|
||||
## Hard Constraints
|
||||
|
||||
- Claude Max subscription is in use. Anthropic's April 2026 terms **prohibit using the
|
||||
subscription with third-party harnesses that spoof the Anthropic API surface**.
|
||||
- `ANTHROPIC_BASE_URL` → LiteLLM workaround is explicitly out of scope.
|
||||
- Claude must remain the reasoning engine. Other models are tools, not replacements.
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure Available
|
||||
|
||||
| Machine | Role | Relevant services |
|
||||
|---------|------|-------------------|
|
||||
| koala | GPU inference | llama-swap, Ollama, Qdrant, LiteLLM proxy |
|
||||
| iguana | Services, builds | k3s, general services |
|
||||
| flamingo | Daily driver | Claude Code runs here |
|
||||
|
||||
LiteLLM proxy on koala exposes 100+ models (local + cloud) through a unified API.
|
||||
All machines connected via Tailscale.
|
||||
|
||||
---
|
||||
|
||||
## Approved Patterns
|
||||
|
||||
### Pattern 1 — Native Claude model tiering (zero build)
|
||||
|
||||
Claude Code subagents support per-agent model selection via frontmatter.
|
||||
Use this for cost routing within the Claude model family.
|
||||
|
||||
```yaml
|
||||
# ~/.claude/agents/explorer.md
|
||||
---
|
||||
name: explorer
|
||||
description: File reading, code search, codebase mapping — use for all exploration tasks
|
||||
model: haiku
|
||||
---
|
||||
```
|
||||
|
||||
- `haiku` for exploration, summarization, classification
|
||||
- `sonnet` (default) for main reasoning and implementation
|
||||
- `opus` for deep analysis, architecture decisions
|
||||
|
||||
**When to use**: Always. Add `model: haiku` to any subagent that does read-heavy or
|
||||
classification work. Cheapest and fastest path to cost control.
|
||||
|
||||
---
|
||||
|
||||
### Pattern 2 — MCP tools wrapping local models (primary build target)
|
||||
|
||||
Expose local models on koala as named MCP tools. Claude remains the orchestrator and
|
||||
reasoning engine — it calls local models as tools the same way it calls any other tool.
|
||||
|
||||
This is the intended MCP use case and carries zero ToS risk.
|
||||
|
||||
**Semantic contract**: Claude decides *when* to delegate based on the tool description.
|
||||
Write descriptions that tell Claude what the model is good for.
|
||||
|
||||
#### MCP server implementation
|
||||
|
||||
Small Python server, run on koala or flamingo, registered in Claude Code settings.
|
||||
|
||||
```python
|
||||
# supervisor/scripts/mcp_local_models.py
|
||||
import mcp
|
||||
import requests
|
||||
|
||||
server = mcp.Server("local-models")
|
||||
|
||||
LITELLM_BASE = "http://koala:4000"
|
||||
OLLAMA_BASE = "http://koala:11434"
|
||||
|
||||
def _litellm_chat(model: str, prompt: str) -> str:
|
||||
r = requests.post(f"{LITELLM_BASE}/v1/chat/completions", json={
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 2048,
|
||||
})
|
||||
r.raise_for_status()
|
||||
return r.json()["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
@server.tool()
|
||||
def ask_local_llama(prompt: str) -> str:
|
||||
"""Ask the local Llama model on koala.
|
||||
Use for: bulk summarization, first-pass analysis, classification, simple Q&A,
|
||||
anything that does not require deep reasoning or up-to-date knowledge.
|
||||
Faster and cheaper than cloud models for routine subtasks."""
|
||||
return _litellm_chat("llama3-local", prompt)
|
||||
|
||||
|
||||
@server.tool()
|
||||
def ask_coding_model(code: str, question: str) -> str:
|
||||
"""Ask a code-specialized local model.
|
||||
Use for: syntax checking, boilerplate generation, code formatting questions,
|
||||
simple refactors where pattern-matching is sufficient."""
|
||||
return _litellm_chat("codellama-local", f"Code:\n{code}\n\nQuestion: {question}")
|
||||
|
||||
|
||||
@server.tool()
|
||||
def list_available_local_models() -> list[str]:
|
||||
"""List all models currently available on the local LiteLLM proxy."""
|
||||
r = requests.get(f"{LITELLM_BASE}/v1/models")
|
||||
r.raise_for_status()
|
||||
return [m["id"] for m in r.json()["data"]]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run_stdio_server(server)
|
||||
```
|
||||
|
||||
#### Register in Claude Code
|
||||
|
||||
Add to `~/.claude/settings.json` (or project-level `.claude/settings.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"local-models": {
|
||||
"command": "python3",
|
||||
"args": ["/path/to/supervisor/scripts/mcp_local_models.py"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### LiteLLM config additions needed on koala
|
||||
|
||||
```yaml
|
||||
# litellm config.yaml — add model entries for local models
|
||||
model_list:
|
||||
- model_name: llama3-local
|
||||
litellm_params:
|
||||
model: ollama/llama3.2
|
||||
api_base: http://localhost:11434
|
||||
|
||||
- model_name: codellama-local
|
||||
litellm_params:
|
||||
model: ollama/codellama
|
||||
api_base: http://localhost:11434
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Pattern 3 — External orchestration scripts (for pipeline workflows)
|
||||
|
||||
For multi-model pipelines that don't need to live inside a Claude Code session.
|
||||
These scripts use their own API key (separate from Max subscription — API billing),
|
||||
so they can call Claude API + LiteLLM freely.
|
||||
|
||||
Claude Code invokes them via the Bash tool.
|
||||
|
||||
```
|
||||
Claude Code → [Bash tool] → ./scripts/orchestrate.py → {Claude API, LiteLLM, local models}
|
||||
```
|
||||
|
||||
```python
|
||||
# supervisor/scripts/orchestrate.py
|
||||
import anthropic
|
||||
import requests
|
||||
|
||||
claude = anthropic.Anthropic() # reads ANTHROPIC_API_KEY — separate from Max subscription
|
||||
|
||||
def analyze_document(path: str) -> str:
|
||||
with open(path) as f:
|
||||
content = f.read()
|
||||
|
||||
# Step 1: local Llama extracts structure (fast, cheap)
|
||||
structure = requests.post("http://koala:4000/v1/chat/completions", json={
|
||||
"model": "llama3-local",
|
||||
"messages": [{"role": "user", "content": f"Extract key sections from:\n{content}"}],
|
||||
}).json()["choices"][0]["message"]["content"]
|
||||
|
||||
# Step 2: Claude synthesizes and reasons over it
|
||||
synthesis = claude.messages.create(
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=2048,
|
||||
messages=[{"role": "user", "content": f"Synthesize these findings:\n{structure}"}]
|
||||
)
|
||||
return synthesis.content[0].text
|
||||
```
|
||||
|
||||
**When to use**: Batch processing, automated pipelines, workflows triggered by cron or
|
||||
external events. Not for interactive Claude Code sessions.
|
||||
|
||||
---
|
||||
|
||||
## What to Skip
|
||||
|
||||
| Approach | Why skip |
|
||||
|----------|----------|
|
||||
| `ANTHROPIC_BASE_URL` → LiteLLM | ToS violation with Max subscription (April 2026 terms) |
|
||||
| Third-party harnesses (OpenClaw etc.) | Explicitly banned for subscription users |
|
||||
| A2A in Claude Code | Not implemented by Anthropic yet — revisit late 2026 |
|
||||
| OpenAI agent handoffs | Loses execution context, not worth the complexity |
|
||||
|
||||
---
|
||||
|
||||
## Protocol Landscape (for awareness, not immediate action)
|
||||
|
||||
- **MCP** — production, 97M monthly downloads, your primary tool-access protocol. LiteLLM
|
||||
natively supports it as both MCP gateway and MCP client as of v1.60+.
|
||||
- **A2A v1.0** — Google/Linux Foundation, 150+ orgs in production, but Anthropic has not
|
||||
shipped it in Claude Code. The intent is agent-to-agent peer delegation (vs MCP's
|
||||
agent-to-tool). Worth watching for H2 2026.
|
||||
- **AGNTCY** — Cisco/Linux Foundation, discovery and identity layer beneath MCP+A2A.
|
||||
Potentially relevant for multi-machine routing across koala/iguana/flamingo once mature.
|
||||
|
||||
---
|
||||
|
||||
## Build Priority
|
||||
|
||||
| Step | Effort | Value | When |
|
||||
|------|--------|-------|------|
|
||||
| Add `model: haiku` to explorer subagents | 10 min | Immediate cost saving | Now |
|
||||
| Write MCP server for local models | 2–3h | Local model access in sessions | Soon |
|
||||
| Register MCP server in Claude Code settings | 15 min | Activates pattern 2 | With above |
|
||||
| Write orchestration script template | 1–2h | Pipeline workflows | When needed |
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- LiteLLM MCP docs: https://docs.litellm.ai/docs/mcp
|
||||
- Community MCP wrapper for LiteLLM: https://github.com/itsDarianNgo/mcp-server-litellm
|
||||
- Ollama MCP server: https://github.com/rawveg/ollama-mcp
|
||||
- A2A protocol status: https://www.linuxfoundation.org/press/a2a-protocol-surpasses-150-organizations-lands-in-major-cloud-platforms-and-sees-enterprise-production-use-in-first-year
|
||||
- AGNTCY: https://github.com/agntcy
|
||||
2138
docs/superpowers/plans/2026-04-17-hyperguild-phase1.md
Normal file
2138
docs/superpowers/plans/2026-04-17-hyperguild-phase1.md
Normal file
File diff suppressed because it is too large
Load Diff
1871
docs/superpowers/plans/2026-04-19-hyperguild-phase2.md
Normal file
1871
docs/superpowers/plans/2026-04-19-hyperguild-phase2.md
Normal file
File diff suppressed because it is too large
Load Diff
923
docs/superpowers/plans/2026-04-20-cd-pipeline.md
Normal file
923
docs/superpowers/plans/2026-04-20-cd-pipeline.md
Normal file
@@ -0,0 +1,923 @@
|
||||
# CD Pipeline Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Build a GitOps CD pipeline that automatically builds a container image on `main` push and deploys it to k3s on koala via Flux.
|
||||
|
||||
**Architecture:** BuildKit runs as a systemd service on koala (same host as the Gitea runner); CD pushes images to the Gitea registry and commits image tag updates to the infra repo; Flux reconciles within 60s. App secrets (including ANTHROPIC_API_KEY) are SOPS-encrypted in the infra repo and decrypted by Flux at apply time.
|
||||
|
||||
**Tech Stack:** Go 1.26, Node.js 22 (for claude CLI), BuildKit (buildctl), Gitea Actions, Flux (kustomize-controller), SOPS + age, k3s/containerd
|
||||
|
||||
---
|
||||
|
||||
## Environment context
|
||||
|
||||
This plan spans three environments. Each task header notes which environment it runs in:
|
||||
|
||||
- **[this-repo]** — `/Users/mathias/Documents/local-dev/AI/supervisor` on flamingo
|
||||
- **[koala-ssh]** — `ssh koala` (run commands via `ssh koala "..."`)
|
||||
- **[infra-repo]** — `gitea.d-ma.be/mathias/infra` (clone to a temp dir, work there, push)
|
||||
- **[gitea-ui]** — Gitea web UI at `https://gitea.d-ma.be`
|
||||
- **[kubectl]** — kubectl from flamingo (home LAN)
|
||||
|
||||
---
|
||||
|
||||
## File map
|
||||
|
||||
**This repo (supervisor):**
|
||||
- Create: `Dockerfile`
|
||||
- Create: `.gitea/workflows/cd.yml`
|
||||
|
||||
**koala host:**
|
||||
- Create: `/etc/systemd/system/buildkitd.service` (or user-level equivalent)
|
||||
- Create: `/root/.config/buildkit/buildkitd.toml` (registry auth config)
|
||||
|
||||
**Infra repo (`gitea.d-ma.be/mathias/infra`):**
|
||||
- Create: `apps/supervisor/namespace.yaml`
|
||||
- Create: `apps/supervisor/deployment.yaml`
|
||||
- Create: `apps/supervisor/service.yaml`
|
||||
- Create: `apps/supervisor/secrets.enc.yaml` (SOPS-encrypted)
|
||||
- Create: `apps/supervisor/kustomization.yaml`
|
||||
- Create: `apps/imagepullsecret/secret.enc.yaml` (SOPS-encrypted)
|
||||
- Create: `apps/imagepullsecret/kustomization.yaml`
|
||||
- Modify: `clusters/koala/kustomization.yaml` (add supervisor + imagepullsecret)
|
||||
- Modify: `flux-system/kustomization.yaml` or relevant Flux Kustomization CRD (add SOPS decryption)
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Dockerfile [this-repo]
|
||||
|
||||
The supervisor binary depends on the `claude` CLI as a subprocess. The image uses a multi-stage build: Go builder stage compiles the binary; the runtime stage is Node.js (for `npm install -g @anthropic-ai/claude-code`). Config files are baked in. The `brain/` directory is a volume mount.
|
||||
|
||||
**Files:**
|
||||
- Create: `Dockerfile`
|
||||
|
||||
- [ ] **Step 1: Verify no Dockerfile exists**
|
||||
|
||||
```bash
|
||||
ls Dockerfile 2>/dev/null || echo "confirmed: no Dockerfile"
|
||||
```
|
||||
|
||||
Expected: `confirmed: no Dockerfile`
|
||||
|
||||
- [ ] **Step 2: Create the Dockerfile**
|
||||
|
||||
```dockerfile
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ── Build stage ───────────────────────────────────────────────────────────────
|
||||
FROM golang:1.26-bookworm AS builder
|
||||
|
||||
ARG VERSION=dev
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
|
||||
-o /out/supervisor ./cmd/supervisor
|
||||
|
||||
# ── Runtime stage ─────────────────────────────────────────────────────────────
|
||||
# Node.js 22 slim — needed for claude CLI subprocess
|
||||
FROM node:22-slim
|
||||
|
||||
# Install claude CLI (provides the `claude` binary the supervisor shells out to)
|
||||
RUN npm install -g @anthropic-ai/claude-code \
|
||||
&& claude --version \
|
||||
&& echo "claude CLI installed"
|
||||
|
||||
# Copy supervisor binary
|
||||
COPY --from=builder /out/supervisor /usr/local/bin/supervisor
|
||||
|
||||
# Bake in config (models.yaml + skill discipline files)
|
||||
COPY config/ /app/config/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# brain/ is writable state — mount a PersistentVolume here
|
||||
VOLUME /app/brain
|
||||
|
||||
ENV SUPERVISOR_CONFIG_DIR=/app/config/supervisor
|
||||
ENV SUPERVISOR_MODELS_FILE=/app/config/models.yaml
|
||||
ENV SUPERVISOR_BRAIN_DIR=/app/brain
|
||||
ENV SUPERVISOR_SESSIONS_DIR=/app/brain/sessions
|
||||
ENV SUPERVISOR_PORT=3200
|
||||
|
||||
EXPOSE 3200
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/supervisor"]
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Build locally to verify it compiles (no push)**
|
||||
|
||||
```bash
|
||||
# buildctl must be available locally, OR use docker if available on flamingo
|
||||
docker build --target builder -t supervisor-build-test . && echo "build stage OK"
|
||||
# If no docker on flamingo, skip this step and verify at Task 3 on koala instead
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add Dockerfile
|
||||
git commit -m "feat: add multi-stage Dockerfile with claude CLI runtime"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: BuildKit systemd service on koala [koala-ssh]
|
||||
|
||||
Install `buildkitd` as a root-level systemd service on koala. The Gitea runner process runs as root (confirmed by PID/cgroup), so the root socket at `/run/buildkit/buildkitd.sock` is accessible to it.
|
||||
|
||||
**Files:**
|
||||
- Create: `/etc/systemd/system/buildkitd.service` on koala
|
||||
- Create: `/etc/buildkit/buildkitd.toml` on koala (registry auth)
|
||||
|
||||
- [ ] **Step 1: Check if buildkitd is already installed**
|
||||
|
||||
```bash
|
||||
ssh koala "buildkitd --version 2>/dev/null || echo 'not installed'"
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Install buildkitd on koala**
|
||||
|
||||
Download the latest buildkit release binary (arm64 or amd64 — koala has x86_64):
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
BUILDKIT_VERSION=v0.21.0
|
||||
curl -sSL https://github.com/moby/buildkit/releases/download/\${BUILDKIT_VERSION}/buildkit-\${BUILDKIT_VERSION}.linux-amd64.tar.gz \
|
||||
| tar -xz -C /usr/local/
|
||||
buildkitd --version
|
||||
"
|
||||
```
|
||||
|
||||
Expected output includes: `buildkitd github.com/moby/buildkit v0.21.0`
|
||||
|
||||
- [ ] **Step 3: Create buildkitd.toml with Gitea registry auth**
|
||||
|
||||
The `[registry]` block configures auth for pushing to `gitea.d-ma.be`. The actual credentials come from `~/.docker/config.json` (which buildkitd reads automatically) — this toml just enables the registry:
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
mkdir -p /etc/buildkit
|
||||
cat > /etc/buildkit/buildkitd.toml << 'EOF'
|
||||
[worker.containerd]
|
||||
enabled = false
|
||||
|
||||
[worker.oci]
|
||||
enabled = true
|
||||
|
||||
[registry.\"gitea.d-ma.be\"]
|
||||
http = false
|
||||
insecure = false
|
||||
EOF
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create systemd unit**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
cat > /etc/systemd/system/buildkitd.service << 'EOF'
|
||||
[Unit]
|
||||
Description=BuildKit daemon
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/local/bin/buildkitd --config /etc/buildkit/buildkitd.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=1048576
|
||||
LimitNPROC=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
systemctl daemon-reload
|
||||
systemctl enable buildkitd
|
||||
systemctl start buildkitd
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Verify the socket exists and is responsive**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
systemctl status buildkitd --no-pager
|
||||
buildctl --addr unix:///run/buildkit/buildkitd.sock debug info
|
||||
"
|
||||
```
|
||||
|
||||
Expected: service `active (running)`, buildctl shows BuildKit version info.
|
||||
|
||||
- [ ] **Step 6: Smoke-test build with trivial Dockerfile**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
echo 'FROM alpine:3.21
|
||||
RUN echo hello' | buildctl --addr unix:///run/buildkit/buildkitd.sock build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=/ \
|
||||
--opt filename=Dockerfile \
|
||||
--output type=image,name=localhost/smoke-test:latest
|
||||
echo 'smoke test OK'
|
||||
"
|
||||
```
|
||||
|
||||
Expected: `smoke test OK`
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Gitea registry push auth for buildctl [koala-ssh]
|
||||
|
||||
`buildctl` reads Docker-style credentials from `/root/.docker/config.json`. Create the credentials file so the runner can push to `gitea.d-ma.be`.
|
||||
|
||||
**Prerequisites:** A Gitea user token or password with `write:packages` scope for the `mathias` org. Create one in Gitea → User Settings → Applications → Generate Token (scopes: `write:packages`).
|
||||
|
||||
- [ ] **Step 1: Create Gitea access token**
|
||||
|
||||
In Gitea UI (`https://gitea.d-ma.be`) → top-right avatar → Settings → Applications → Generate New Token.
|
||||
- Token name: `buildkit-push`
|
||||
- Scopes: `write:packages` (container registry write)
|
||||
- Copy the token — it won't be shown again.
|
||||
|
||||
- [ ] **Step 2: Write docker config.json on koala**
|
||||
|
||||
Replace `<TOKEN>` with the token from Step 1:
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
mkdir -p /root/.docker
|
||||
TOKEN=<TOKEN>
|
||||
AUTH=\$(echo -n 'mathias:'\${TOKEN} | base64)
|
||||
cat > /root/.docker/config.json << EOF
|
||||
{
|
||||
\"auths\": {
|
||||
\"gitea.d-ma.be\": {
|
||||
\"auth\": \"\${AUTH}\"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
chmod 600 /root/.docker/config.json
|
||||
echo 'credentials written'
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Verify push works**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
echo 'FROM alpine:3.21' | buildctl --addr unix:///run/buildkit/buildkitd.sock build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=/ \
|
||||
--opt filename=Dockerfile \
|
||||
--output type=image,name=gitea.d-ma.be/mathias/supervisor:push-test,push=true
|
||||
echo 'push OK'
|
||||
"
|
||||
```
|
||||
|
||||
Expected: `push OK`. Verify in Gitea UI: `https://gitea.d-ma.be/mathias/supervisor/packages` should show a `push-test` tag.
|
||||
|
||||
- [ ] **Step 4: Delete the test image tag**
|
||||
|
||||
In Gitea UI → supervisor repo → Packages tab → delete the `push-test` tag.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: age keypair + Flux SOPS decryption [kubectl + flamingo]
|
||||
|
||||
Flux decrypts SOPS-encrypted secrets at apply time. It needs the age private key stored as a k8s Secret in the `flux-system` namespace.
|
||||
|
||||
- [ ] **Step 1: Verify age is installed**
|
||||
|
||||
```bash
|
||||
age --version || brew install age
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Generate age keypair**
|
||||
|
||||
```bash
|
||||
age-keygen -o /tmp/supervisor-age.key
|
||||
cat /tmp/supervisor-age.key
|
||||
```
|
||||
|
||||
Output includes two lines:
|
||||
```
|
||||
# public key: age1xxxxxx...
|
||||
AGE-SECRET-KEY-1xxxxxxx...
|
||||
```
|
||||
|
||||
**Copy the public key** (the `age1...` value) — you'll need it in Task 7 for encrypting secrets.
|
||||
**Store the private key file securely** — back it up outside the cluster (e.g., 1Password or encrypted note).
|
||||
|
||||
- [ ] **Step 3: Create the SOPS age secret in flux-system**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic sops-age \
|
||||
--from-file=age.agekey=/tmp/supervisor-age.key \
|
||||
-n flux-system
|
||||
kubectl get secret sops-age -n flux-system
|
||||
```
|
||||
|
||||
Expected: secret exists with `age.agekey` key.
|
||||
|
||||
- [ ] **Step 4: Shred the temp key file**
|
||||
|
||||
```bash
|
||||
shred -u /tmp/supervisor-age.key
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Check what Flux Kustomization CRDs exist in the infra repo**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-sops-setup
|
||||
ls /tmp/infra-sops-setup/flux-system/
|
||||
```
|
||||
|
||||
Look for a `kustomization.yaml` or `gotk-sync.yaml` that defines the main Flux Kustomization resource pointing at the `clusters/koala/` path.
|
||||
|
||||
- [ ] **Step 6: Patch the Flux Kustomization to enable SOPS decryption**
|
||||
|
||||
Find the Kustomization resource that syncs `clusters/koala/`. It will look like:
|
||||
|
||||
```yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
spec:
|
||||
path: ./clusters/koala
|
||||
...
|
||||
```
|
||||
|
||||
Add the `decryption` block:
|
||||
|
||||
```yaml
|
||||
decryption:
|
||||
provider: sops
|
||||
secretRef:
|
||||
name: sops-age
|
||||
```
|
||||
|
||||
Edit the file in `/tmp/infra-sops-setup/flux-system/` and commit:
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-sops-setup
|
||||
# Edit the relevant Kustomization yaml to add decryption block (shown above)
|
||||
git add flux-system/
|
||||
git commit -m "feat: enable SOPS decryption via age key in flux-system"
|
||||
git push
|
||||
```
|
||||
|
||||
- [ ] **Step 7: Verify Flux picks up the change**
|
||||
|
||||
```bash
|
||||
flux reconcile source git flux-system
|
||||
flux get kustomizations
|
||||
```
|
||||
|
||||
Expected: `flux-system` Kustomization shows `Ready True` with no errors.
|
||||
|
||||
- [ ] **Step 8: Clean up temp clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-sops-setup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Infra repo — supervisor app manifests [infra-repo]
|
||||
|
||||
Create the full k8s manifest set for the supervisor service in the infra repo. The deployment uses an `IMAGE_TAG` placeholder; the CD job patches this with the actual git sha before pushing.
|
||||
|
||||
**Prerequisites:** age public key from Task 4 Step 2.
|
||||
|
||||
- [ ] **Step 1: Clone the infra repo**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-supervisor
|
||||
cd /tmp/infra-supervisor
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create namespace**
|
||||
|
||||
```bash
|
||||
mkdir -p apps/supervisor
|
||||
cat > apps/supervisor/namespace.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: supervisor
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Create deployment**
|
||||
|
||||
The `brain` volume is a `hostPath` on koala (simplest for a single-node service; add a PVC later if needed). The image uses `imagePullSecrets` to pull from the Gitea registry.
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/deployment.yaml << 'EOF'
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: supervisor
|
||||
namespace: supervisor
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: supervisor
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: supervisor
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: koala
|
||||
imagePullSecrets:
|
||||
- name: gitea-registry
|
||||
containers:
|
||||
- name: supervisor
|
||||
image: gitea.d-ma.be/mathias/supervisor:IMAGE_TAG
|
||||
ports:
|
||||
- containerPort: 3200
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: supervisor-secrets
|
||||
env:
|
||||
- name: SUPERVISOR_PORT
|
||||
value: "3200"
|
||||
- name: LITELLM_BASE_URL
|
||||
value: "http://iguana:4000"
|
||||
- name: LLAMA_SWAP_URL
|
||||
value: "http://koala:8080"
|
||||
- name: INGEST_BASE_URL
|
||||
value: "http://localhost:3300"
|
||||
volumeMounts:
|
||||
- name: brain
|
||||
mountPath: /app/brain
|
||||
volumes:
|
||||
- name: brain
|
||||
hostPath:
|
||||
path: /var/lib/supervisor/brain
|
||||
type: DirectoryOrCreate
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create service**
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/service.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: supervisor
|
||||
namespace: supervisor
|
||||
spec:
|
||||
selector:
|
||||
app: supervisor
|
||||
ports:
|
||||
- port: 3200
|
||||
targetPort: 3200
|
||||
type: ClusterIP
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Create kustomization.yaml for supervisor**
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- secrets.enc.yaml
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Ensure clusters/koala/kustomization.yaml exists and includes supervisor**
|
||||
|
||||
Check if the file exists:
|
||||
|
||||
```bash
|
||||
cat clusters/koala/kustomization.yaml 2>/dev/null || echo "need to create"
|
||||
```
|
||||
|
||||
If it exists, add supervisor and imagepullsecret resources. If it does not exist, create it:
|
||||
|
||||
```bash
|
||||
cat > clusters/koala/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ../../apps/imagepullsecret
|
||||
- ../../apps/supervisor
|
||||
EOF
|
||||
```
|
||||
|
||||
If it already exists, add the two resource lines (preserving existing entries).
|
||||
|
||||
- [ ] **Step 7: Commit (without secrets — those come in Task 6)**
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
git add apps/supervisor/ clusters/koala/
|
||||
git commit -m "feat(supervisor): add k8s manifests for supervisor service"
|
||||
git push
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: SOPS-encrypted secrets in infra repo [infra-repo + flamingo]
|
||||
|
||||
Two encrypted secret files: the imagePullSecret for the Gitea container registry, and the supervisor app secrets (ANTHROPIC_API_KEY, LITELLM_API_KEY).
|
||||
|
||||
**Prerequisites:**
|
||||
- age public key from Task 4 Step 2 (format: `age1xxxxx...`)
|
||||
- `sops` installed (`brew install sops` if missing)
|
||||
- Gitea registry token (same one used in Task 3, or create a read-only one for pulling)
|
||||
|
||||
- [ ] **Step 1: Verify sops is installed**
|
||||
|
||||
```bash
|
||||
sops --version || brew install sops
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create .sops.yaml in infra repo root**
|
||||
|
||||
This tells sops which key to use for all files in the repo:
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
cat > .sops.yaml << 'EOF'
|
||||
creation_rules:
|
||||
- age: age1REPLACE_WITH_YOUR_PUBLIC_KEY
|
||||
EOF
|
||||
git add .sops.yaml
|
||||
git commit -m "chore: add sops config (age key)"
|
||||
git push
|
||||
```
|
||||
|
||||
Replace `age1REPLACE_WITH_YOUR_PUBLIC_KEY` with the actual age public key from Task 4.
|
||||
|
||||
- [ ] **Step 3: Create and encrypt the imagePullSecret**
|
||||
|
||||
The imagePullSecret is a namespace-less Secret (it will be targeted per namespace via Kustomize). Create it in the `imagepullsecret` app:
|
||||
|
||||
```bash
|
||||
mkdir -p apps/imagepullsecret
|
||||
|
||||
# Create a registry pull token in Gitea: Settings → Applications → Generate Token
|
||||
# Scopes: read:packages
|
||||
# Use that token here (or reuse the buildkit-push token — read access is enough for pulling)
|
||||
PULL_TOKEN=<gitea-read-packages-token>
|
||||
PULL_AUTH=$(echo -n "mathias:${PULL_TOKEN}" | base64)
|
||||
|
||||
cat > /tmp/gitea-pull-secret.yaml << EOF
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: gitea-registry
|
||||
namespace: supervisor
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
stringData:
|
||||
.dockerconfigjson: |
|
||||
{
|
||||
"auths": {
|
||||
"gitea.d-ma.be": {
|
||||
"auth": "${PULL_AUTH}"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
sops --encrypt /tmp/gitea-pull-secret.yaml > apps/imagepullsecret/secret.enc.yaml
|
||||
rm /tmp/gitea-pull-secret.yaml
|
||||
|
||||
cat > apps/imagepullsecret/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- secret.enc.yaml
|
||||
EOF
|
||||
```
|
||||
|
||||
Verify the encrypted file looks correct (should show `sops:` metadata at the bottom):
|
||||
|
||||
```bash
|
||||
tail -20 apps/imagepullsecret/secret.enc.yaml
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create and encrypt supervisor app secrets**
|
||||
|
||||
```bash
|
||||
# ANTHROPIC_API_KEY: your Anthropic API key
|
||||
# LITELLM_API_KEY: the key your LiteLLM instance expects (can be any string if it's local)
|
||||
cat > /tmp/supervisor-secrets.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: supervisor-secrets
|
||||
namespace: supervisor
|
||||
type: Opaque
|
||||
stringData:
|
||||
ANTHROPIC_API_KEY: "REPLACE_WITH_REAL_KEY"
|
||||
LITELLM_API_KEY: "REPLACE_WITH_REAL_KEY"
|
||||
EOF
|
||||
|
||||
# Edit /tmp/supervisor-secrets.yaml to insert real values, then:
|
||||
sops --encrypt /tmp/supervisor-secrets.yaml > apps/supervisor/secrets.enc.yaml
|
||||
rm /tmp/supervisor-secrets.yaml
|
||||
```
|
||||
|
||||
Verify:
|
||||
|
||||
```bash
|
||||
tail -20 apps/supervisor/secrets.enc.yaml
|
||||
# Should show encrypted values and sops metadata
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Commit encrypted secrets**
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
git add apps/imagepullsecret/ apps/supervisor/secrets.enc.yaml .sops.yaml
|
||||
git commit -m "feat: add SOPS-encrypted imagePullSecret and supervisor app secrets"
|
||||
git push
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Verify Flux reconciles and creates the secrets**
|
||||
|
||||
Wait ~60s then:
|
||||
|
||||
```bash
|
||||
flux reconcile kustomization flux-system --with-source
|
||||
kubectl get secrets -n supervisor
|
||||
```
|
||||
|
||||
Expected: `gitea-registry` and `supervisor-secrets` appear in the `supervisor` namespace.
|
||||
|
||||
- [ ] **Step 7: Clean up temp clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-supervisor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Gitea org-level secrets [gitea-ui + koala-ssh]
|
||||
|
||||
Set the three secrets that all repos in the `mathias` org will inherit. These go in the Gitea org (not individual repos).
|
||||
|
||||
**Files:** No files — Gitea UI configuration.
|
||||
|
||||
- [ ] **Step 1: Generate SSH deploy key for infra repo**
|
||||
|
||||
On flamingo:
|
||||
|
||||
```bash
|
||||
ssh-keygen -t ed25519 -C "cd-bot infra deploy key" -f /tmp/infra-deploy-key -N ""
|
||||
cat /tmp/infra-deploy-key # private key → INFRA_DEPLOY_KEY secret
|
||||
cat /tmp/infra-deploy-key.pub # public key → add to Gitea infra repo as deploy key
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add public key to infra repo as a deploy key (write access)**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/mathias/infra` → Settings → Deploy Keys → Add Deploy Key.
|
||||
- Title: `cd-bot`
|
||||
- Key: paste content of `/tmp/infra-deploy-key.pub`
|
||||
- Enable write access: ✓
|
||||
|
||||
- [ ] **Step 3: Set org-level secrets in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/org/mathias/settings/secrets` → Add Secret.
|
||||
|
||||
Set these three secrets:
|
||||
|
||||
| Secret name | Value |
|
||||
|-------------|-------|
|
||||
| `INFRA_DEPLOY_KEY` | content of `/tmp/infra-deploy-key` (private key, including `-----BEGIN...` lines) |
|
||||
| `BUILDKIT_REGISTRY_AUTH` | same base64 auth string as used in Task 3 Step 2 (format: `mathias:<token>` base64-encoded) |
|
||||
|
||||
Note: `BUILDKIT_REGISTRY_AUTH` is redundant if `/root/.docker/config.json` is already on the runner host from Task 3 — but setting it as a secret allows the `cd.yml` to explicitly pass it to `buildctl` for clarity and rotation.
|
||||
|
||||
- [ ] **Step 4: Clean up temp key files**
|
||||
|
||||
```bash
|
||||
shred -u /tmp/infra-deploy-key /tmp/infra-deploy-key.pub
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Verify secrets appear in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/org/mathias/settings/secrets` — confirm both secrets are listed (values are hidden, only names shown).
|
||||
|
||||
---
|
||||
|
||||
## Task 8: cd.yml workflow [this-repo]
|
||||
|
||||
Create the CD workflow that triggers after CI passes, builds the image with buildctl, and commits the updated tag to the infra repo.
|
||||
|
||||
**Files:**
|
||||
- Create: `.gitea/workflows/cd.yml`
|
||||
|
||||
- [ ] **Step 1: Create cd.yml**
|
||||
|
||||
```bash
|
||||
cat > .gitea/workflows/cd.yml << 'EOF'
|
||||
name: cd
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Build and deploy
|
||||
needs: [check] # 'check' is the job name in ci.yml
|
||||
runs-on: self-hosted
|
||||
env:
|
||||
SERVICE: supervisor
|
||||
REGISTRY: gitea.d-ma.be
|
||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and push image
|
||||
run: |
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||
--output "type=image,name=${IMAGE}:${IMAGE_TAG},push=true"
|
||||
echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
|
||||
id: build
|
||||
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
# Write SSH key for infra repo
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
||||
chmod 600 ~/.ssh/infra_deploy_key
|
||||
ssh-keyscan gitea.d-ma.be >> ~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
# Clone infra repo
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git clone "${INFRA_REPO}" /tmp/infra-update
|
||||
|
||||
# Patch the image tag
|
||||
cd /tmp/infra-update
|
||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||
"apps/${SERVICE}/deployment.yaml"
|
||||
|
||||
# Commit and push
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add "apps/${SERVICE}/deployment.yaml"
|
||||
git commit -m "chore(deploy): ${SERVICE} → ${IMAGE_TAG}"
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git push
|
||||
|
||||
# Clean up
|
||||
rm -rf /tmp/infra-update
|
||||
rm ~/.ssh/infra_deploy_key
|
||||
echo "Infra repo updated: ${SERVICE} → ${IMAGE_TAG}"
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify the `needs` job name matches ci.yml**
|
||||
|
||||
```bash
|
||||
grep "^ [a-z].*:$" .gitea/workflows/ci.yml
|
||||
```
|
||||
|
||||
The output should show `check:` as the quality-gate job name. The `cd.yml` uses `needs: [check]` — confirm this matches.
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add .gitea/workflows/cd.yml
|
||||
git commit -m "feat: add CD workflow (buildctl → Gitea registry → infra repo update)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 9: End-to-end smoke test
|
||||
|
||||
Trigger the full pipeline and verify each stage.
|
||||
|
||||
- [ ] **Step 1: Push to main to trigger CI + CD**
|
||||
|
||||
```bash
|
||||
git push origin main
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Monitor CI job in Gitea**
|
||||
|
||||
Open `https://gitea.d-ma.be/mathias/supervisor/actions` — wait for the `ci` workflow `check` job to pass.
|
||||
|
||||
- [ ] **Step 3: Monitor CD job**
|
||||
|
||||
In the same actions view, the `cd` workflow should start after `ci` passes. Check the `Build and push image` step output for:
|
||||
|
||||
```
|
||||
Building gitea.d-ma.be/mathias/supervisor:<sha>
|
||||
```
|
||||
|
||||
And the `Update infra repo` step for:
|
||||
|
||||
```
|
||||
Infra repo updated: supervisor → <sha>
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Verify image in Gitea registry**
|
||||
|
||||
```
|
||||
https://gitea.d-ma.be/mathias/supervisor/packages
|
||||
```
|
||||
|
||||
Should show a new tag matching the commit sha.
|
||||
|
||||
- [ ] **Step 5: Verify infra repo commit**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-verify
|
||||
cd /tmp/infra-verify
|
||||
git log --oneline -3
|
||||
```
|
||||
|
||||
Expected: most recent commit message is `chore(deploy): supervisor → <sha>`.
|
||||
|
||||
```bash
|
||||
grep "image:" apps/supervisor/deployment.yaml
|
||||
```
|
||||
|
||||
Expected: `image: gitea.d-ma.be/mathias/supervisor:<sha>`
|
||||
|
||||
- [ ] **Step 6: Verify Flux reconciles**
|
||||
|
||||
```bash
|
||||
flux get kustomizations
|
||||
```
|
||||
|
||||
Expected: `flux-system` shows `Ready True` and `Applied revision: main/<infra-sha>`.
|
||||
|
||||
```bash
|
||||
kubectl get pods -n supervisor
|
||||
```
|
||||
|
||||
Expected: supervisor pod is `Running` with the new image sha.
|
||||
|
||||
- [ ] **Step 7: Verify pod started correctly**
|
||||
|
||||
```bash
|
||||
kubectl logs -n supervisor deployment/supervisor --tail=20
|
||||
```
|
||||
|
||||
Expected: supervisor startup logs (MCP server listening on port 3200, no errors).
|
||||
|
||||
- [ ] **Step 8: Clean up verify clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-verify
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Post-deploy — registry retention policy [gitea-ui]
|
||||
|
||||
Prevent the Gitea container registry from filling up by setting a tag retention policy.
|
||||
|
||||
- [ ] **Step 1: Set tag retention in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/mathias/supervisor` → Settings → Packages → Container Registry.
|
||||
|
||||
Set: Keep last **20** tags per image name.
|
||||
|
||||
If Gitea does not expose a UI retention policy, note this for manual cleanup and open a task to automate it (e.g., a weekly Actions job that calls `docker image prune` via the Gitea API).
|
||||
|
||||
- [ ] **Step 2: Verify existing test tags are cleaned up**
|
||||
|
||||
Manually delete any test tags pushed during Task 3 if not already done.
|
||||
|
||||
---
|
||||
|
||||
## Self-review checklist (for plan author — not a task)
|
||||
|
||||
- [x] **Spec coverage:** BuildKit systemd ✓, cd.yml ✓, Flux SOPS ✓, infra repo structure ✓, imagePullSecret ✓, app secrets ✓, Gitea org secrets ✓, error handling (implicit in workflow failures) ✓, registry retention ✓, smoke test ✓
|
||||
- [x] **Placeholders:** `REPLACE_WITH_YOUR_PUBLIC_KEY` and `REPLACE_WITH_REAL_KEY` are intentional — real values come from user's secrets; marked clearly
|
||||
- [x] **Type consistency:** No shared types across tasks (infra-only plan)
|
||||
- [x] **Known gaps:** `needs: [check]` assumes ci.yml job name is `check` — verified in Task 8 Step 2. The `sed` image tag patch assumes no other image line in deployment.yaml — the deployment template only has one `image:` line.
|
||||
1617
docs/superpowers/plans/2026-04-20-model-orchestration-plan.md
Normal file
1617
docs/superpowers/plans/2026-04-20-model-orchestration-plan.md
Normal file
File diff suppressed because it is too large
Load Diff
1073
docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md
Normal file
1073
docs/superpowers/plans/2026-04-22-phase4-attempt-wiring.md
Normal file
File diff suppressed because it is too large
Load Diff
218
docs/superpowers/specs/2026-04-20-cd-pipeline-design.md
Normal file
218
docs/superpowers/specs/2026-04-20-cd-pipeline-design.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# CD Pipeline Design
|
||||
|
||||
**Date:** 2026-04-20
|
||||
**Status:** Approved for implementation
|
||||
|
||||
## Problem statement
|
||||
|
||||
The supervisor (and future services on the koala k3s cluster) have no automated deployment path after CI passes. Images are not built, the cluster is updated manually, and there is no audit trail for what is running where.
|
||||
|
||||
## Goal
|
||||
|
||||
After a push to `main` passes CI, automatically build a container image, push it to the Gitea registry, and update the cluster via GitOps — with a design that scales to many repos and services without per-repo kubeconfig or secret sprawl.
|
||||
|
||||
## Success criteria
|
||||
|
||||
- [ ] Successful `main` push triggers image build and push to `gitea.d-ma.be/<org>/<repo>:<git-sha>`
|
||||
- [ ] Infra repo receives a commit updating the image tag for the deployed service
|
||||
- [ ] Flux reconciles within 60s of the infra repo commit; pod runs the new image
|
||||
- [ ] Rollback = one commit to infra repo reverting the tag
|
||||
- [ ] Secrets (app secrets, registry pull) are SOPS-encrypted in infra repo; no manual `kubectl create secret`
|
||||
- [ ] Adding a new service requires only: adding `apps/<service>/` to infra repo + `cd.yml` to the app repo
|
||||
- [ ] Zero changes to the k3s cluster networking or runner configuration
|
||||
|
||||
## Constraints
|
||||
|
||||
- Gitea Actions self-hosted runner runs as a **systemd host process** on koala — not a k8s pod; cannot use cluster DNS
|
||||
- k3s uses containerd; no Docker daemon, no nerdctl on koala
|
||||
- Flux is already running (core controllers only); image-reflector/image-automation are NOT installed and will NOT be added
|
||||
- SOPS + age is the secret management standard; no plaintext Secrets in git
|
||||
- All org-level Gitea secrets are shared across repos — minimize the set
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Multi-cluster promotion (koala only for now; infra repo structure supports adding clusters later)
|
||||
- Automated rollback on health check failure (manual rollback via infra repo commit)
|
||||
- Build caching beyond BuildKit's local disk cache
|
||||
- PR preview environments
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
App repo (supervisor, n8n, etc.)
|
||||
↓ push to main
|
||||
Gitea Actions — ci.yml (lint + test)
|
||||
↓ passes
|
||||
Gitea Actions — cd.yml
|
||||
├─ 1. buildctl → BuildKit (unix socket on koala host)
|
||||
│ → pushes gitea.d-ma.be/<org>/<repo>:<git-sha>
|
||||
├─ 2. Clone infra repo (SSH deploy key)
|
||||
│ → patch apps/<service>/deployment.yaml IMAGE_TAG → <git-sha>
|
||||
│ → git commit + push
|
||||
└─ done
|
||||
|
||||
gitea.d-ma.be/mathias/infra (Flux source)
|
||||
↓ Flux source-controller detects new commit (30s interval)
|
||||
kustomize-controller
|
||||
└─ applies apps/<service>/kustomization.yaml → k3s namespace
|
||||
↓
|
||||
pod runs new image (pulls from gitea.d-ma.be with imagePullSecret)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components
|
||||
|
||||
### 1. BuildKit — systemd service on koala
|
||||
|
||||
BuildKit runs as a rootless systemd service on the koala host, identical to the Gitea runner pattern already in use.
|
||||
|
||||
- Socket: `unix:///run/user/<uid>/buildkit/buildkitd.sock` (rootless) or `/run/buildkit/buildkitd.sock` (root)
|
||||
- Cache: local disk at default BuildKit cache path — persists across builds
|
||||
- Access: `buildctl --addr unix:///run/buildkit/buildkitd.sock` from the runner process (same host, same user)
|
||||
- No k3s involvement for builds
|
||||
|
||||
### 2. Gitea Actions — `cd.yml`
|
||||
|
||||
Separate workflow file; triggers on `main` push after `ci.yml` succeeds.
|
||||
|
||||
```yaml
|
||||
name: cd
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
needs: [ci] # or workflow_run trigger — see implementation plan
|
||||
runs-on: [self-hosted, koala]
|
||||
env:
|
||||
IMAGE: gitea.d-ma.be/${{ github.repository }}:${{ github.sha }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build and push
|
||||
run: |
|
||||
buildctl --addr unix:///run/buildkit/buildkitd.sock \
|
||||
build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--output type=image,name=$IMAGE,push=true
|
||||
env:
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra
|
||||
cd /tmp/infra
|
||||
sed -i "s|IMAGE_TAG|${{ github.sha }}|g" apps/${{ env.SERVICE_NAME }}/deployment.yaml
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add apps/${{ env.SERVICE_NAME }}/deployment.yaml
|
||||
git commit -m "chore(deploy): ${{ env.SERVICE_NAME }} → ${{ github.sha }}"
|
||||
git push
|
||||
env:
|
||||
GIT_SSH_COMMAND: ssh -i /tmp/infra-deploy-key -o StrictHostKeyChecking=no
|
||||
```
|
||||
|
||||
`SERVICE_NAME` is set per-repo (either hardcoded in `cd.yml` or derived from the repo name).
|
||||
|
||||
### 3. Org-level Gitea secrets
|
||||
|
||||
Three secrets, set once, inherited by all repos:
|
||||
|
||||
| Secret | Purpose |
|
||||
|--------|---------|
|
||||
| `BUILDKIT_REGISTRY_AUTH` | credentials for pushing to `gitea.d-ma.be` (buildctl `--opt` or `~/.docker/config.json`) |
|
||||
| `INFRA_DEPLOY_KEY` | SSH private key with write access to `gitea.d-ma.be/mathias/infra` |
|
||||
| `KUBECONFIG_KOALA` | (optional) kubeconfig for manual `kubectl` steps if ever needed; scoped ServiceAccount |
|
||||
|
||||
### 4. Infra repo structure
|
||||
|
||||
```
|
||||
gitea.d-ma.be/mathias/infra
|
||||
├── clusters/
|
||||
│ └── koala/
|
||||
│ └── kustomization.yaml # points at ../../apps/*/
|
||||
├── apps/
|
||||
│ ├── supervisor/
|
||||
│ │ ├── namespace.yaml
|
||||
│ │ ├── deployment.yaml # image: gitea.d-ma.be/mathias/supervisor:IMAGE_TAG
|
||||
│ │ ├── service.yaml
|
||||
│ │ ├── secrets.enc.yaml # SOPS-encrypted app secrets (ANTHROPIC_API_KEY, etc.)
|
||||
│ │ └── kustomization.yaml
|
||||
│ ├── n8n/
|
||||
│ │ └── ...
|
||||
│ └── imagepullsecret/
|
||||
│ └── secret.enc.yaml # SOPS-encrypted imagePullSecret for gitea.d-ma.be
|
||||
└── flux-system/ # existing Flux bootstrap manifests
|
||||
```
|
||||
|
||||
Adding a new service = add `apps/<service>/` directory. The `clusters/koala/kustomization.yaml` uses a glob or explicit list.
|
||||
|
||||
### 5. SOPS + age for Flux
|
||||
|
||||
Flux decrypts SOPS-encrypted files at apply time using an age key stored as a k8s Secret in the `flux-system` namespace. Setup:
|
||||
|
||||
1. Generate age keypair: `age-keygen`
|
||||
2. Store private key: `kubectl create secret generic sops-age --from-file=age.agekey -n flux-system`
|
||||
3. Configure Flux Kustomization with `decryption.provider: sops`
|
||||
4. Encrypt secrets before committing: `sops --encrypt --age <pubkey> secret.yaml > secret.enc.yaml`
|
||||
|
||||
App secrets (e.g., `ANTHROPIC_API_KEY`) and the registry pull secret live as encrypted files in `apps/<service>/` and `apps/imagepullsecret/` respectively.
|
||||
|
||||
### 6. Image pull secret
|
||||
|
||||
Each app namespace needs a `kubernetes.io/dockerconfigjson` Secret to pull from `gitea.d-ma.be`. This Secret is SOPS-encrypted in `apps/imagepullsecret/` and applied to each app namespace via Kustomize `namespace` field or a shared Kustomize component.
|
||||
|
||||
---
|
||||
|
||||
## Data flow: supervisor deploy
|
||||
|
||||
1. Push to `supervisor` main → CI passes (lint/test/vet)
|
||||
2. CD job builds image: `gitea.d-ma.be/mathias/supervisor:abc1234`
|
||||
3. CD job clones infra repo, patches `apps/supervisor/deployment.yaml`, commits
|
||||
4. Flux source-controller detects infra commit within 30s
|
||||
5. kustomize-controller applies `apps/supervisor/kustomization.yaml`
|
||||
6. Flux decrypts `secrets.enc.yaml` → k8s Secret in `supervisor` namespace
|
||||
7. k3s pulls `gitea.d-ma.be/mathias/supervisor:abc1234` using imagePullSecret
|
||||
8. Pod starts with new image; previous pod terminates
|
||||
|
||||
Rollback: `git revert <tag-commit>` in infra repo → Flux reconciles → old image deployed.
|
||||
|
||||
---
|
||||
|
||||
## Error handling
|
||||
|
||||
| Scenario | Behaviour |
|
||||
|----------|-----------|
|
||||
| CI fails | `cd.yml` does not run (`needs: ci` gate) |
|
||||
| BuildKit unreachable | `buildctl` exits non-zero → workflow fails; infra repo untouched |
|
||||
| Image push fails | Workflow fails; infra repo untouched; cluster unchanged |
|
||||
| Infra repo push conflict | Retry once with rebase; fail and alert if still conflicting |
|
||||
| Flux reconcile error | Notification-controller fires alert; pods stay on previous image |
|
||||
| Pod image pull fails | `ImagePullBackOff`; Flux reports degraded Kustomization |
|
||||
| SOPS decrypt fails | Kustomization fails; Flux reports error; no partial apply |
|
||||
|
||||
---
|
||||
|
||||
## Testing approach
|
||||
|
||||
1. **BuildKit smoke test** — `buildctl build` with a trivial one-line Dockerfile; verify image appears in Gitea registry
|
||||
2. **cd.yml dry run** — trigger manually on a test branch; verify infra repo commit contains correct sha
|
||||
3. **Flux reconcile test** — push infra commit; verify `flux get kustomizations` shows `Ready` and pod runs new image sha
|
||||
4. **Pull secret test** — delete pod, verify it restarts and pulls from Gitea registry without `ImagePullBackOff`
|
||||
5. **SOPS round-trip test** — encrypt a dummy secret, push to infra repo, verify Flux decrypts and `kubectl get secret` shows correct data
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| BuildKit socket path varies by user/rootless mode | Confirm path during setup; hardcode in `cd.yml` |
|
||||
| Infra repo concurrent pushes (multiple repos deploying simultaneously) | Git rebase retry handles this; unlikely at current scale |
|
||||
| age private key lost | Back up to SOPS-accessible location; document recovery procedure |
|
||||
| Registry storage fills up | Set Gitea registry tag retention policy (keep last 20 per repo) |
|
||||
| Gitea deploy key compromised | Rotate via Gitea UI; single key for infra repo only |
|
||||
34
ingestion/Dockerfile
Normal file
34
ingestion/Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
FROM golang:1.26-bookworm AS builder
|
||||
|
||||
ARG VERSION=dev
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -trimpath -ldflags="-s -w" \
|
||||
-o /out/ingestion ./cmd/server
|
||||
|
||||
FROM alpine:3.21
|
||||
|
||||
COPY --from=builder /out/ingestion /usr/local/bin/ingestion
|
||||
|
||||
RUN addgroup -S ingestion && adduser -S -G ingestion ingestion
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# brain/ is writable state — mount a PersistentVolume here
|
||||
VOLUME /app/brain
|
||||
|
||||
ENV INGEST_BRAIN_DIR=/app/brain
|
||||
ENV INGEST_PORT=3300
|
||||
|
||||
USER ingestion
|
||||
|
||||
EXPOSE 3300
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/ingestion"]
|
||||
@@ -79,7 +79,7 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
||||
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
|
||||
}
|
||||
|
||||
rawDir := filepath.Join(h.brainDir, "raw")
|
||||
rawDir := filepath.Join(h.brainDir, "knowledge")
|
||||
if err := os.MkdirAll(rawDir, 0o755); err != nil {
|
||||
http.Error(w, "failed to create raw dir", http.StatusInternalServerError)
|
||||
return
|
||||
@@ -99,7 +99,11 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
||||
finalContent = fm.String() + req.Content
|
||||
}
|
||||
|
||||
dest := filepath.Join(rawDir, filepath.Base(filename))
|
||||
base := filepath.Base(filename)
|
||||
if !strings.HasSuffix(base, ".md") {
|
||||
base += ".md"
|
||||
}
|
||||
dest := filepath.Join(rawDir, base)
|
||||
if err := os.WriteFile(dest, []byte(finalContent), 0o644); err != nil {
|
||||
h.logger.Error("write failed", "err", err)
|
||||
http.Error(w, "write error", http.StatusInternalServerError)
|
||||
|
||||
@@ -20,10 +20,9 @@ import (
|
||||
func setup(t *testing.T) (string, *api.Handler) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "raw"), 0o755))
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", "tdd.md"),
|
||||
filepath.Join(dir, "knowledge", "tdd.md"),
|
||||
[]byte("---\ntitle: TDD\ndomain: software\n---\n\nTest-driven development is a discipline.\n"),
|
||||
0o644,
|
||||
))
|
||||
@@ -46,7 +45,7 @@ func TestQuery_ReturnsResults(t *testing.T) {
|
||||
assert.NotEmpty(t, results)
|
||||
}
|
||||
|
||||
func TestWrite_CreatesRawFile(t *testing.T) {
|
||||
func TestWrite_CreatesKnowledgeFile(t *testing.T) {
|
||||
dir, h := setup(t)
|
||||
body, _ := json.Marshal(map[string]any{
|
||||
"content": "# Test note\n\nSome content.",
|
||||
@@ -62,8 +61,7 @@ func TestWrite_CreatesRawFile(t *testing.T) {
|
||||
require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &resp))
|
||||
assert.NotEmpty(t, resp["path"])
|
||||
|
||||
written := filepath.Join(dir, "raw", "test-note.md")
|
||||
content, err := os.ReadFile(written)
|
||||
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "test-note.md"))
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(content), "Some content.")
|
||||
}
|
||||
@@ -93,7 +91,7 @@ func TestWrite_IncludesFrontmatterWhenTypeProvided(t *testing.T) {
|
||||
h.Write(rec, req)
|
||||
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
content, err := os.ReadFile(filepath.Join(dir, "raw", "typed-note.md"))
|
||||
content, err := os.ReadFile(filepath.Join(dir, "knowledge", "typed-note.md"))
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(content), "type: concept")
|
||||
assert.Contains(t, string(content), "domain: software")
|
||||
@@ -109,7 +107,8 @@ func TestWrite_GeneratesFilenameIfAbsent(t *testing.T) {
|
||||
h.Write(rec, req)
|
||||
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
entries, _ := os.ReadDir(filepath.Join(dir, "raw"))
|
||||
assert.Len(t, entries, 1)
|
||||
assert.True(t, strings.HasSuffix(entries[0].Name(), ".md"))
|
||||
entries, _ := os.ReadDir(filepath.Join(dir, "knowledge"))
|
||||
// +1 because setup already wrote tdd.md
|
||||
assert.Len(t, entries, 2)
|
||||
assert.True(t, strings.HasSuffix(entries[1].Name(), ".md"))
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
||||
|
||||
var results []Result
|
||||
|
||||
err := filepath.WalkDir(filepath.Join(brainDir, "wiki"), func(path string, d os.DirEntry, err error) error {
|
||||
err := filepath.WalkDir(filepath.Join(brainDir, "knowledge"), func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
slog.Warn("search: skipping path", "path", path, "err", err)
|
||||
return nil
|
||||
|
||||
@@ -14,17 +14,15 @@ import (
|
||||
|
||||
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||
|
||||
// Write a concept page mentioning "retry"
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", "retry-logic.md"),
|
||||
filepath.Join(dir, "knowledge", "retry-logic.md"),
|
||||
[]byte("---\ntitle: Retry Logic\ndomain: software\n---\n\nRetry logic handles transient failures by re-attempting operations.\n"),
|
||||
0o644,
|
||||
))
|
||||
// Write an unrelated page
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", "database.md"),
|
||||
filepath.Join(dir, "knowledge", "database.md"),
|
||||
[]byte("---\ntitle: Database\ndomain: software\n---\n\nA database stores structured data.\n"),
|
||||
0o644,
|
||||
))
|
||||
@@ -32,7 +30,7 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||
results, err := search.Query(dir, "retry transient", 5)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
assert.Equal(t, "wiki/concepts/retry-logic.md", results[0].Path)
|
||||
assert.Equal(t, "knowledge/retry-logic.md", results[0].Path)
|
||||
assert.Equal(t, "Retry Logic", results[0].Title)
|
||||
assert.Greater(t, results[0].Score, 0)
|
||||
assert.Contains(t, results[0].Excerpt, "Retry")
|
||||
@@ -40,10 +38,10 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||
|
||||
func TestSearch_RespectsLimit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki", "concepts"), 0o755))
|
||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||
for i := 0; i < 5; i++ {
|
||||
require.NoError(t, os.WriteFile(
|
||||
filepath.Join(dir, "wiki", "concepts", fmt.Sprintf("page-%d.md", i)),
|
||||
filepath.Join(dir, "knowledge", fmt.Sprintf("page-%d.md", i)),
|
||||
[]byte(fmt.Sprintf("---\ntitle: Page %d\n---\n\nThis page mentions retry.\n", i)),
|
||||
0o644,
|
||||
))
|
||||
|
||||
76
internal/brain/client.go
Normal file
76
internal/brain/client.go
Normal file
@@ -0,0 +1,76 @@
|
||||
// internal/brain/client.go
|
||||
// Package brain provides a lightweight client for querying the ingestion server.
|
||||
// Skill handlers call Query before spawning workers to inject relevant knowledge
|
||||
// from the brain into the task prompt. Errors are suppressed — the brain is
|
||||
// optional context; its absence must never block a skill invocation.
|
||||
package brain
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type queryResult struct {
|
||||
Path string `json:"path"`
|
||||
Title string `json:"title"`
|
||||
Excerpt string `json:"excerpt"`
|
||||
Score int `json:"score"`
|
||||
}
|
||||
|
||||
// Query calls the ingestion server and returns relevant knowledge as a
|
||||
// formatted string ready to prepend to a worker task prompt.
|
||||
// Returns empty string (no error) when baseURL or query is empty,
|
||||
// when the brain is unreachable, or when no results are found.
|
||||
func Query(ctx context.Context, baseURL, query string, limit int) (string, error) {
|
||||
if baseURL == "" || strings.TrimSpace(query) == "" {
|
||||
return "", nil
|
||||
}
|
||||
if limit <= 0 {
|
||||
limit = 3
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(map[string]any{"query": query, "limit": limit})
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, baseURL+"/query", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
slog.Warn("brain: build request failed", "err", err)
|
||||
return "", nil
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
slog.Warn("brain: ingestion server unreachable", "err", err)
|
||||
return "", nil
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
slog.Warn("brain: ingestion server returned non-OK", "status", resp.StatusCode)
|
||||
return "", nil
|
||||
}
|
||||
|
||||
out, _ := io.ReadAll(resp.Body)
|
||||
var result struct {
|
||||
Results []queryResult `json:"results"`
|
||||
}
|
||||
if err := json.Unmarshal(out, &result); err != nil || len(result.Results) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("## Relevant knowledge\n\n")
|
||||
for _, r := range result.Results {
|
||||
title := r.Title
|
||||
if title == "" {
|
||||
title = r.Path
|
||||
}
|
||||
fmt.Fprintf(&b, "### %s\n%s\n\n", title, r.Excerpt)
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
67
internal/brain/client_test.go
Normal file
67
internal/brain/client_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package brain_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/brain"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestQueryEmptyBaseURL(t *testing.T) {
|
||||
result, err := brain.Query(context.Background(), "", "tdd patterns", 3)
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, result)
|
||||
}
|
||||
|
||||
func TestQueryEmptyQuery(t *testing.T) {
|
||||
result, err := brain.Query(context.Background(), "http://localhost:9999", "", 3)
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, result)
|
||||
}
|
||||
|
||||
func TestQueryFormatsResults(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/query", r.URL.Path)
|
||||
var req map[string]any
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
assert.Equal(t, "tdd patterns", req["query"])
|
||||
|
||||
json.NewEncoder(w).Encode(map[string]any{ //nolint:errcheck
|
||||
"results": []map[string]any{
|
||||
{"path": "knowledge/tdd.md", "title": "TDD Guide", "excerpt": "Always write tests first.", "score": 5},
|
||||
{"path": "knowledge/go.md", "title": "Go Conventions", "excerpt": "Use table-driven tests.", "score": 3},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
result, err := brain.Query(context.Background(), srv.URL, "tdd patterns", 3)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, result, "## Relevant knowledge")
|
||||
assert.Contains(t, result, "TDD Guide")
|
||||
assert.Contains(t, result, "Always write tests first.")
|
||||
assert.Contains(t, result, "Go Conventions")
|
||||
}
|
||||
|
||||
func TestQueryEmptyResults(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
json.NewEncoder(w).Encode(map[string]any{"results": []any{}}) //nolint:errcheck
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
result, err := brain.Query(context.Background(), srv.URL, "obscure query", 3)
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, result)
|
||||
}
|
||||
|
||||
func TestQueryUnavailableServerReturnsEmpty(t *testing.T) {
|
||||
// Brain unavailable — should degrade gracefully, no error
|
||||
result, err := brain.Query(context.Background(), "http://127.0.0.1:19999", "query", 3)
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, result)
|
||||
}
|
||||
@@ -12,8 +12,6 @@ type skillChain struct {
|
||||
}
|
||||
|
||||
type modelsFile struct {
|
||||
Verifier string `yaml:"verifier"`
|
||||
LlamaSwapURL string `yaml:"llama_swap_url"`
|
||||
DefaultChain []string `yaml:"default_chain"`
|
||||
Skills map[string]skillChain `yaml:"skills"`
|
||||
}
|
||||
@@ -34,23 +32,18 @@ func LoadModels(path string) (Models, error) {
|
||||
return Models{data: f}, nil
|
||||
}
|
||||
|
||||
// Verifier returns the model name to use for all local-tier output verification.
|
||||
func (m Models) Verifier() string { return m.data.Verifier }
|
||||
|
||||
// LlamaSwapURL returns the llama-swap base URL for warm-state probing.
|
||||
func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL }
|
||||
|
||||
// ChainFor returns the ordered list of model names for a skill.
|
||||
// If override is non-empty, returns a single-entry chain (no escalation).
|
||||
// Falls back to default_chain when the skill has no explicit entry.
|
||||
func (m Models) ChainFor(skill, override string) []string {
|
||||
// ModelFor returns the primary model to use for a skill.
|
||||
// If override is non-empty, it is returned directly.
|
||||
// Falls back to default_chain[0] when the skill has no explicit entry.
|
||||
func (m Models) ModelFor(skill, override string) string {
|
||||
if override != "" {
|
||||
return []string{override}
|
||||
return override
|
||||
}
|
||||
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
||||
return sc.Chain
|
||||
return sc.Chain[0]
|
||||
}
|
||||
out := make([]string, len(m.data.DefaultChain))
|
||||
copy(out, m.data.DefaultChain)
|
||||
return out
|
||||
if len(m.data.DefaultChain) > 0 {
|
||||
return m.data.DefaultChain[0]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -11,9 +11,6 @@ import (
|
||||
)
|
||||
|
||||
const testYAML = `
|
||||
verifier: claude-sonnet-4-6
|
||||
llama_swap_url: http://koala:8080
|
||||
|
||||
default_chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
@@ -37,44 +34,20 @@ func writeModels(t *testing.T, content string) string {
|
||||
return f
|
||||
}
|
||||
|
||||
func TestModelsVerifier(t *testing.T) {
|
||||
func TestModelsModelForSkillWithEntry(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "claude-sonnet-4-6", m.Verifier())
|
||||
assert.Equal(t, "ollama/devstral-tuned", m.ModelFor("review", ""))
|
||||
}
|
||||
|
||||
func TestModelsLlamaSwapURL(t *testing.T) {
|
||||
func TestModelsModelForDefaultFallback(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "http://koala:8080", m.LlamaSwapURL())
|
||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.ModelFor("trainer", ""))
|
||||
}
|
||||
|
||||
func TestModelsChainForSkillOverride(t *testing.T) {
|
||||
func TestModelsModelForCallerOverride(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
chain := m.ChainFor("review", "")
|
||||
require.Len(t, chain, 3)
|
||||
assert.Equal(t, "ollama/devstral-tuned", chain[0])
|
||||
assert.Equal(t, "ollama/gemma4", chain[1])
|
||||
assert.Equal(t, "claude-sonnet-4-6", chain[2])
|
||||
}
|
||||
|
||||
func TestModelsChainForDefaultFallback(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
chain := m.ChainFor("trainer", "") // not in skills map
|
||||
require.Len(t, chain, 2)
|
||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0])
|
||||
assert.Equal(t, "claude-sonnet-4-6", chain[1])
|
||||
}
|
||||
|
||||
func TestModelsChainForCallerOverride(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
chain := m.ChainFor("review", "claude-opus-4-6")
|
||||
require.Len(t, chain, 1)
|
||||
assert.Equal(t, "claude-opus-4-6", chain[0])
|
||||
assert.Equal(t, "claude-opus-4-6", m.ModelFor("review", "claude-opus-4-6"))
|
||||
}
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config holds executor configuration.
|
||||
type Config struct {
|
||||
ClaudeBinary string // path to claude binary, defaults to "claude"
|
||||
SystemPrompt string // contents of supervisor CLAUDE.md
|
||||
Timeout time.Duration // per-invocation timeout, default 120s
|
||||
LiteLLMBaseURL string // passed to Claude so it can delegate to Ollama
|
||||
LiteLLMAPIKey string // passed to Claude for LiteLLM auth
|
||||
}
|
||||
|
||||
// Request is the input to a single supervisor invocation.
|
||||
type Request struct {
|
||||
SkillPrompt string // skill-specific discipline (e.g. tdd.md contents)
|
||||
TaskPrompt string // the specific task (phase, project_root, spec, model)
|
||||
Model string // resolved model name, passed in task prompt
|
||||
Tools string // comma-separated allowed tools, default "Bash,Read,Write"
|
||||
}
|
||||
|
||||
// Executor spawns a claude instance and captures its structured JSON output.
|
||||
type Executor struct {
|
||||
cfg Config
|
||||
}
|
||||
|
||||
func New(cfg Config) *Executor {
|
||||
if cfg.ClaudeBinary == "" {
|
||||
cfg.ClaudeBinary = "claude"
|
||||
}
|
||||
if cfg.Timeout == 0 {
|
||||
cfg.Timeout = 120 * time.Second
|
||||
}
|
||||
return &Executor{cfg: cfg}
|
||||
}
|
||||
|
||||
func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, e.cfg.Timeout)
|
||||
defer cancel()
|
||||
|
||||
tools := req.Tools
|
||||
if tools == "" {
|
||||
tools = "Bash,Read,Write"
|
||||
}
|
||||
|
||||
// Build the full prompt: system rules + skill rules + infra context + task.
|
||||
// LITELLM_API_KEY is injected as a subprocess env var, not in the prompt,
|
||||
// to prevent it appearing in error log output.
|
||||
litellmCtx := fmt.Sprintf("LITELLM_BASE_URL: %s", e.cfg.LiteLLMBaseURL)
|
||||
prompt := strings.Join([]string{
|
||||
e.cfg.SystemPrompt,
|
||||
"---",
|
||||
req.SkillPrompt,
|
||||
"---",
|
||||
litellmCtx,
|
||||
"---",
|
||||
req.TaskPrompt,
|
||||
}, "\n\n")
|
||||
|
||||
args := []string{
|
||||
"--print",
|
||||
"--permission-mode", "bypassPermissions",
|
||||
"--tools", tools,
|
||||
"--json-schema", Schema,
|
||||
"--output-format", "json",
|
||||
}
|
||||
if strings.HasPrefix(req.Model, "claude-") {
|
||||
args = append(args, "--model", req.Model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
||||
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return Result{}, fmt.Errorf("timeout after %s", e.cfg.Timeout)
|
||||
}
|
||||
return Result{}, fmt.Errorf("claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
// --output-format json wraps the response in an envelope; structured output
|
||||
// from --json-schema is in the "structured_output" field.
|
||||
var envelope struct {
|
||||
StructuredOutput *Result `json:"structured_output"`
|
||||
IsError bool `json:"is_error"`
|
||||
Result string `json:"result"` // fallback text result for error messages
|
||||
}
|
||||
if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
|
||||
return Result{}, fmt.Errorf("parse envelope JSON: %w — raw: %s — stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
if envelope.StructuredOutput == nil {
|
||||
return Result{}, fmt.Errorf("no structured_output in response — result: %s — stderr: %s", envelope.Result, stderr.String())
|
||||
}
|
||||
if err := envelope.StructuredOutput.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("invalid result: %w", err)
|
||||
}
|
||||
return *envelope.StructuredOutput, nil
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// fakeClaudePath writes a shell script that prints fixed output and returns its path.
|
||||
func fakeClaudePath(t *testing.T, output string, exitCode int) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
var content string
|
||||
if exitCode != 0 {
|
||||
content = "#!/bin/sh\necho 'error' >&2\nexit 1\n"
|
||||
} else {
|
||||
content = "#!/bin/sh\necho '" + output + "'\n"
|
||||
}
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
return script
|
||||
}
|
||||
|
||||
func TestExecutorParsesValidResult(t *testing.T) {
|
||||
// Fake claude emits the --output-format json envelope that the real CLI produces.
|
||||
// The executor extracts the result from the "structured_output" field.
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"red","skill":"tdd","file_path":"/tmp/x_test.go","runner_output":"FAIL","verified":true,"model_used":"self","message":"ok"}}`
|
||||
claude := fakeClaudePath(t, envelope, 0)
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: claude,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
result, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "tdd rules",
|
||||
TaskPrompt: "run red phase",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.True(t, result.Verified)
|
||||
}
|
||||
|
||||
func TestExecutorReturnsErrorOnNonZeroExit(t *testing.T) {
|
||||
claude := fakeClaudePath(t, "", 1)
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: claude,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "fail"})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestExecutorTimesOut(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nsleep 60\n"), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "you are a supervisor",
|
||||
Timeout: 100 * time.Millisecond,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
||||
assert.ErrorContains(t, err, "timeout")
|
||||
}
|
||||
|
||||
func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
|
||||
// The script captures its args to a temp file so we can assert --model was passed.
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "claude-sonnet-4-6",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
|
||||
}
|
||||
|
||||
func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, string(argsData), "--model")
|
||||
}
|
||||
@@ -6,12 +6,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
||||
// Local models are expected to return a JSON object matching the Result schema
|
||||
// as their response content — no envelope.
|
||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
|
||||
// and returns the raw assistant message text.
|
||||
type LiteLLMExecutor struct {
|
||||
baseURL string
|
||||
apiKey string
|
||||
@@ -21,9 +21,12 @@ type LiteLLMExecutor struct {
|
||||
// NewLiteLLM creates a LiteLLMExecutor.
|
||||
// timeout applies to the full HTTP round-trip per call.
|
||||
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||
if timeout == 0 {
|
||||
timeout = 120 * time.Second
|
||||
}
|
||||
return &LiteLLMExecutor{
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
httpClient: &http.Client{Timeout: timeout},
|
||||
}
|
||||
}
|
||||
@@ -46,58 +49,79 @@ type litellmResponse struct {
|
||||
Choices []litellmChoice `json:"choices"`
|
||||
}
|
||||
|
||||
// Run dispatches req to the LiteLLM server and parses the Result from the
|
||||
// assistant message content. Returns an error on network failure, non-200
|
||||
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
||||
// as automatic escalation triggers.
|
||||
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
// Complete sends system+user messages to the given model and returns the raw
|
||||
// assistant text along with the round-trip duration in milliseconds.
|
||||
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
|
||||
body := litellmRequest{
|
||||
Model: req.Model,
|
||||
Model: model,
|
||||
Messages: []litellmMessage{
|
||||
{Role: "system", Content: req.SkillPrompt},
|
||||
{Role: "user", Content: req.TaskPrompt},
|
||||
{Role: "system", Content: system},
|
||||
{Role: "user", Content: user},
|
||||
},
|
||||
}
|
||||
|
||||
bodyBytes, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: create request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if e.apiKey != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||
}
|
||||
|
||||
t0 := time.Now()
|
||||
resp, err := e.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
durationMs := time.Since(t0).Milliseconds()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||
return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var chatResp litellmResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
||||
return "", 0, fmt.Errorf("litellm: decode response: %w", err)
|
||||
}
|
||||
if len(chatResp.Choices) == 0 {
|
||||
return Result{}, fmt.Errorf("litellm: no choices in response")
|
||||
return "", 0, fmt.Errorf("litellm: no choices in response")
|
||||
}
|
||||
|
||||
content := chatResp.Choices[0].Message.Content
|
||||
var result Result
|
||||
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
||||
}
|
||||
if err := result.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
return stripResultJSON(chatResp.Choices[0].Message.Content), durationMs, nil
|
||||
}
|
||||
|
||||
// stripResultJSON removes trailing ```json blocks that match the old structured
|
||||
// result schema (containing "status" and "phase" keys). Some local models produce
|
||||
// correct markdown prose but then append the old JSON format out of habit.
|
||||
func stripResultJSON(text string) string {
|
||||
const fence = "```json"
|
||||
idx := len(text) - 1
|
||||
// Walk backwards past trailing whitespace.
|
||||
for idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' || text[idx] == ' ') {
|
||||
idx--
|
||||
}
|
||||
// Must end with closing fence.
|
||||
if idx < 2 || text[idx-2:idx+1] != "```" {
|
||||
return text
|
||||
}
|
||||
// Find the matching opening fence.
|
||||
start := len(text[:idx-2]) - 1
|
||||
for start >= 0 {
|
||||
if start+len(fence) <= len(text) && text[start:start+len(fence)] == fence {
|
||||
block := text[start : idx+1]
|
||||
if strings.Contains(block, `"status"`) && strings.Contains(block, `"phase"`) {
|
||||
return strings.TrimRight(text[:start], " \t\r\n")
|
||||
}
|
||||
break
|
||||
}
|
||||
start--
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
@@ -13,23 +13,11 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func validLiteLLMResult() iexec.Result {
|
||||
return iexec.Result{
|
||||
Status: "pass",
|
||||
Phase: "review",
|
||||
Skill: "review",
|
||||
ModelUsed: "ollama/devstral",
|
||||
Message: "looks good",
|
||||
}
|
||||
}
|
||||
|
||||
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||
func chatResponse(t *testing.T, content string) []byte {
|
||||
t.Helper()
|
||||
content, err := json.Marshal(result)
|
||||
require.NoError(t, err)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
||||
{"message": map[string]any{"role": "assistant", "content": content}},
|
||||
},
|
||||
}
|
||||
data, err := json.Marshal(resp)
|
||||
@@ -37,25 +25,21 @@ func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||
return data
|
||||
}
|
||||
|
||||
func TestLiteLLMParsesValidResult(t *testing.T) {
|
||||
func TestLiteLLMReturnsText(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
_, _ = w.Write(chatResponse(t, "here is my analysis"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
result, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "review the code",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
text, dur, err := ex.Complete(context.Background(), "ollama/devstral", "system prompt", "user prompt")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Equal(t, "review", result.Skill)
|
||||
assert.Equal(t, "here is my analysis", text)
|
||||
assert.GreaterOrEqual(t, dur, int64(0))
|
||||
}
|
||||
|
||||
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||
@@ -63,12 +47,12 @@ func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
_, _ = w.Write(chatResponse(t, "ok"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
@@ -79,34 +63,62 @@ func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
assert.ErrorContains(t, err, "503")
|
||||
}
|
||||
|
||||
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
||||
func TestLiteLLMErrorOnEmptyChoices(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
||||
},
|
||||
}
|
||||
data, _ := json.Marshal(resp)
|
||||
_, _ = w.Write(data)
|
||||
_, _ = w.Write([]byte(`{"choices":[]}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
assert.Error(t, err)
|
||||
_, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
assert.ErrorContains(t, err, "no choices")
|
||||
}
|
||||
|
||||
func TestLiteLLMStripsTrailingResultJSON(t *testing.T) {
|
||||
content := "## Hypotheses\n\n**H1 (high):** nil map access.\n\n```json\n{\n \"status\": \"pass\",\n \"phase\": \"debug\",\n \"skill\": \"debug\"\n}\n```"
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponse(t, content))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
text, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, text, "nil map access")
|
||||
assert.NotContains(t, text, `"status"`)
|
||||
assert.NotContains(t, text, "```json")
|
||||
}
|
||||
|
||||
func TestLiteLLMKeepsNonResultJSONFence(t *testing.T) {
|
||||
// A json block that is part of the actual answer (no status/phase) should be kept.
|
||||
content := "Use this config:\n\n```json\n{\"model\": \"koala/phi4\"}\n```"
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponse(t, content))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
text, _, err := ex.Complete(context.Background(), "model", "sys", "user")
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, text, `"model"`)
|
||||
assert.Contains(t, text, "```json")
|
||||
}
|
||||
|
||||
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // Cancel immediately
|
||||
cancel()
|
||||
|
||||
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
_, _, err := ex.Complete(ctx, "model", "sys", "user")
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ChainEntry is one tier in an escalation chain.
|
||||
type ChainEntry struct {
|
||||
Model string // e.g. "ollama/phi4", "claude-sonnet-4-6"
|
||||
Tier string // "local" | "subagent" | "managed"
|
||||
IsCloud bool // true for claude-* models; skips verifier call
|
||||
}
|
||||
|
||||
// EntryFor builds a ChainEntry from a model name string.
|
||||
func EntryFor(model string) ChainEntry {
|
||||
cloud := strings.HasPrefix(model, "claude-")
|
||||
tier := "local"
|
||||
if cloud {
|
||||
tier = "subagent"
|
||||
}
|
||||
return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
|
||||
}
|
||||
|
||||
// AttemptRecord captures the outcome of one tier attempt for session logging.
|
||||
type AttemptRecord struct {
|
||||
Model string
|
||||
Tier string
|
||||
DurationMs int64
|
||||
WarmStart bool
|
||||
Verdict string // "accept" | "escalate" | "error"
|
||||
Feedback string
|
||||
}
|
||||
|
||||
// VerifierFn is the interface the orchestrator uses to verify local output.
|
||||
type VerifierFn interface {
|
||||
Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
||||
}
|
||||
|
||||
// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
|
||||
type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
|
||||
|
||||
// Orchestrator walks an escalation chain, delegating generation and verification.
|
||||
// It implements the ExecutorFn shape expected by skill handlers.
|
||||
type Orchestrator struct {
|
||||
chain []ChainEntry
|
||||
localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil
|
||||
cloudRun ExecutorRunFn // for cloud tiers; may be nil
|
||||
verifier VerifierFn
|
||||
llamaSwapURL string
|
||||
attempts *[]AttemptRecord
|
||||
}
|
||||
|
||||
// NewOrchestrator creates an Orchestrator.
|
||||
// attempts is a pointer to a slice that will be appended to on each tier attempt.
|
||||
// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
|
||||
func NewOrchestrator(
|
||||
chain []ChainEntry,
|
||||
localRun ExecutorRunFn,
|
||||
cloudRun ExecutorRunFn,
|
||||
verifier VerifierFn,
|
||||
llamaSwapURL string,
|
||||
attempts *[]AttemptRecord,
|
||||
) *Orchestrator {
|
||||
return &Orchestrator{
|
||||
chain: chain,
|
||||
localRun: localRun,
|
||||
cloudRun: cloudRun,
|
||||
verifier: verifier,
|
||||
llamaSwapURL: llamaSwapURL,
|
||||
attempts: attempts,
|
||||
}
|
||||
}
|
||||
|
||||
// Run walks the escalation chain and returns the first accepted result.
|
||||
// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
|
||||
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
|
||||
taskPrompt := req.TaskPrompt
|
||||
|
||||
for _, entry := range o.chain {
|
||||
warm := o.probeWarm(entry.Model)
|
||||
start := time.Now()
|
||||
|
||||
tierReq := req
|
||||
tierReq.Model = entry.Model
|
||||
tierReq.TaskPrompt = taskPrompt
|
||||
|
||||
if entry.IsCloud {
|
||||
result, genErr := o.cloudRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
verdict := "accept"
|
||||
if genErr != nil {
|
||||
verdict = "error"
|
||||
}
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: verdict,
|
||||
})
|
||||
if genErr == nil {
|
||||
return result, nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Local tier.
|
||||
result, genErr := o.localRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
|
||||
if genErr != nil {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "error",
|
||||
Feedback: genErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
|
||||
if verErr != nil {
|
||||
// Treat verifier failure as escalate (safe default).
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: "verifier error: " + verErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if verdict.Accept {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "accept",
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: verdict.Feedback,
|
||||
})
|
||||
// Inject verifier feedback into the next tier's task prompt.
|
||||
taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
||||
}
|
||||
|
||||
return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
|
||||
}
|
||||
|
||||
func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
|
||||
if o.attempts != nil {
|
||||
*o.attempts = append(*o.attempts, rec)
|
||||
}
|
||||
}
|
||||
|
||||
// probeWarm checks whether the model is currently loaded in llama-swap.
|
||||
// Returns false on any error or if llamaSwapURL is empty.
|
||||
func (o *Orchestrator) probeWarm(model string) bool {
|
||||
if o.llamaSwapURL == "" {
|
||||
return false
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(string(body), model)
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// stubRunFn returns preset results sequentially.
|
||||
type stubRunFn struct {
|
||||
calls []stubCall
|
||||
callIdx int
|
||||
}
|
||||
|
||||
type stubCall struct {
|
||||
result iexec.Result
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
|
||||
if s.callIdx >= len(s.calls) {
|
||||
return iexec.Result{}, errors.New("unexpected call")
|
||||
}
|
||||
c := s.calls[s.callIdx]
|
||||
s.callIdx++
|
||||
return c.result, c.err
|
||||
}
|
||||
|
||||
// stubVerifier returns preset verdicts sequentially.
|
||||
type stubVerifier struct {
|
||||
verdicts []iexec.Verdict
|
||||
idx int
|
||||
}
|
||||
|
||||
func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
|
||||
if s.idx >= len(s.verdicts) {
|
||||
return iexec.Verdict{}, errors.New("unexpected verify call")
|
||||
}
|
||||
v := s.verdicts[s.idx]
|
||||
s.idx++
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func okResult(skill string) iexec.Result {
|
||||
return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
|
||||
}
|
||||
|
||||
func TestOrchestratorSingleLocalAccept(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "local", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{
|
||||
{Accept: false, Feedback: "missing line refs"},
|
||||
{Accept: true},
|
||||
}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "escalate", attempts[0].Verdict)
|
||||
assert.Equal(t, "missing line refs", attempts[0].Feedback)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{err: errors.New("network failure")},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "error", attempts[0].Verdict)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
|
||||
cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{} // no verdicts — must not be called
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
|
||||
nil, cloud.Run, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "subagent", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
assert.Equal(t, 0, verifier.idx) // verifier never called
|
||||
}
|
||||
|
||||
func TestOrchestratorAllTiersExhausted(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, &stubVerifier{}, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
assert.ErrorContains(t, err, "all tiers exhausted")
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Result is the structured JSON output from every supervisor invocation.
|
||||
// The JSON schema constant is passed to claude via --json-schema so Claude
|
||||
// validates its own output before returning.
|
||||
type Result struct {
|
||||
Status string `json:"status"` // pass | fail | error
|
||||
Phase string `json:"phase"` // red | green | refactor | retrospective | review | debug | spec | trainer
|
||||
Skill string `json:"skill"` // tdd | review | ...
|
||||
FilePath string `json:"file_path"` // absolute path to generated file
|
||||
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
||||
Verified bool `json:"verified"` // based on exit code, never self-report
|
||||
ModelUsed string `json:"model_used"` // model name or "self"
|
||||
Message string `json:"message"` // one sentence summary
|
||||
}
|
||||
|
||||
var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true}
|
||||
var validPhases = map[string]bool{
|
||||
"red": true,
|
||||
"green": true,
|
||||
"refactor": true,
|
||||
"retrospective": true,
|
||||
"review": true,
|
||||
"debug": true,
|
||||
"spec": true,
|
||||
"trainer": true,
|
||||
}
|
||||
|
||||
func (r Result) Validate() error {
|
||||
var errs []string
|
||||
if !validStatuses[r.Status] {
|
||||
errs = append(errs, "status must be pass|fail|error, got: "+r.Status)
|
||||
}
|
||||
if !validPhases[r.Phase] {
|
||||
errs = append(errs, "phase must be one of red|green|refactor|retrospective|review|debug|spec|trainer, got: "+r.Phase)
|
||||
}
|
||||
if r.Skill == "" {
|
||||
errs = append(errs, "skill is required")
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
return errors.New(strings.Join(errs, "; "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Schema is passed to claude --json-schema to enforce structured output.
|
||||
const Schema = `{
|
||||
"type": "object",
|
||||
"required": ["status","phase","skill","file_path","runner_output","verified","model_used","message"],
|
||||
"properties": {
|
||||
"status": {"type": "string", "enum": ["pass","fail","error"]},
|
||||
"phase": {"type": "string"},
|
||||
"skill": {"type": "string"},
|
||||
"file_path": {"type": "string"},
|
||||
"runner_output": {"type": "string"},
|
||||
"verified": {"type": "boolean"},
|
||||
"model_used": {"type": "string"},
|
||||
"message": {"type": "string"}
|
||||
}
|
||||
}`
|
||||
@@ -1,79 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestResultParsesValidJSON(t *testing.T) {
|
||||
raw := `{
|
||||
"status": "pass",
|
||||
"phase": "red",
|
||||
"skill": "tdd",
|
||||
"file_path": "/tmp/foo_test.go",
|
||||
"runner_output": "--- FAIL: TestFoo",
|
||||
"verified": true,
|
||||
"model_used": "self",
|
||||
"message": "test fails as expected"
|
||||
}`
|
||||
var r exec.Result
|
||||
require.NoError(t, json.Unmarshal([]byte(raw), &r))
|
||||
assert.Equal(t, "pass", r.Status)
|
||||
assert.Equal(t, "red", r.Phase)
|
||||
assert.True(t, r.Verified)
|
||||
}
|
||||
|
||||
func TestResultValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
result exec.Result
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid pass result",
|
||||
result: exec.Result{
|
||||
Status: "pass", Phase: "red", Skill: "tdd",
|
||||
FilePath: "/tmp/x_test.go", RunnerOutput: "FAIL",
|
||||
Verified: true, ModelUsed: "self", Message: "ok",
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty status",
|
||||
result: exec.Result{Phase: "red", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid status",
|
||||
result: exec.Result{Status: "unknown", Phase: "red", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid phase",
|
||||
result: exec.Result{Status: "pass", Phase: "bad", Skill: "tdd"},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.result.Validate()
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateAcceptsAllPhases(t *testing.T) {
|
||||
phases := []string{"red", "green", "refactor", "retrospective", "review", "debug", "spec", "trainer"}
|
||||
for _, phase := range phases {
|
||||
r := exec.Result{Status: "pass", Phase: phase, Skill: "test", ModelUsed: "self", Message: "ok"}
|
||||
assert.NoError(t, r.Validate(), "phase %q should be valid", phase)
|
||||
}
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Verdict is the output of a Claude verification call.
|
||||
type Verdict struct {
|
||||
Accept bool `json:"accept"`
|
||||
Feedback string `json:"feedback"` // empty when Accept is true
|
||||
}
|
||||
|
||||
// Verifier runs a focused Claude call to judge local model output.
|
||||
type Verifier struct {
|
||||
claudeBinary string
|
||||
model string
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
||||
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
||||
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
||||
if claudeBinary == "" {
|
||||
claudeBinary = "claude"
|
||||
}
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
return &Verifier{
|
||||
claudeBinary: claudeBinary,
|
||||
model: model,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
||||
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
||||
// to escalate. Returns an error on subprocess failure or unparseable response.
|
||||
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
||||
defer cancel()
|
||||
|
||||
outputJSON, err := json.Marshal(output)
|
||||
if err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
||||
|
||||
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
||||
|
||||
Reply with JSON only — no other text:
|
||||
{"accept": true, "feedback": ""}
|
||||
or
|
||||
{"accept": false, "feedback": "<one sentence reason>"}
|
||||
|
||||
## Skill discipline
|
||||
%s
|
||||
|
||||
## Original task
|
||||
%s
|
||||
|
||||
## Generated output
|
||||
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
||||
|
||||
args := []string{
|
||||
"--print",
|
||||
"--permission-mode", "bypassPermissions",
|
||||
}
|
||||
if v.model != "" {
|
||||
args = append(args, "--model", v.model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
||||
cmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
||||
}
|
||||
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
var verdict Verdict
|
||||
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
||||
}
|
||||
return verdict, nil
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
|
||||
t.Helper()
|
||||
data, err := json.Marshal(verdict)
|
||||
require.NoError(t, err)
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
return script
|
||||
}
|
||||
|
||||
func TestVerifierAccepts(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, verdict.Accept)
|
||||
assert.Empty(t, verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierEscalates(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, verdict.Accept)
|
||||
assert.Equal(t, "missing line references", verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnNonZeroExit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
@@ -36,3 +36,21 @@ func FormatHistory(entries []Entry, excludePhase string) string {
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// PrependHistory reads the session log for sessionID and prepends a formatted
|
||||
// history block to task. Returns task unchanged if sessionID or sessionsDir is
|
||||
// empty, or if no prior entries exist.
|
||||
func PrependHistory(sessionsDir, sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || sessionsDir == "" {
|
||||
return task
|
||||
}
|
||||
entries, err := Read(sessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
package session_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFormatHistoryEmpty(t *testing.T) {
|
||||
@@ -39,3 +41,45 @@ func TestFormatHistoryExcludesCurrentPhase(t *testing.T) {
|
||||
assert.Contains(t, result, "red done")
|
||||
assert.NotContains(t, result, "green done")
|
||||
}
|
||||
|
||||
func TestPrependHistoryNoSessionID(t *testing.T) {
|
||||
result := session.PrependHistory("", "", "review", "do the task")
|
||||
assert.Equal(t, "do the task", result)
|
||||
}
|
||||
|
||||
func TestPrependHistoryNoLog(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||
assert.Equal(t, "do the task", result)
|
||||
}
|
||||
|
||||
func TestPrependHistoryPrependsHistory(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
entry := session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||
FinalStatus: "pass", Message: "wrote test",
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
require.NoError(t, session.Append(dir, "sess-abc", entry))
|
||||
|
||||
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||
assert.Contains(t, result, "## Session history")
|
||||
assert.Contains(t, result, "wrote test")
|
||||
assert.True(t, strings.HasSuffix(result, "do the task"))
|
||||
}
|
||||
|
||||
func TestPrependHistoryExcludesCurrentPhase(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||
FinalStatus: "pass", Message: "red done", Timestamp: time.Now(),
|
||||
}))
|
||||
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "green",
|
||||
FinalStatus: "pass", Message: "green done", Timestamp: time.Now(),
|
||||
}))
|
||||
|
||||
result := session.PrependHistory(dir, "sess-abc", "green", "do the task")
|
||||
assert.Contains(t, result, "red done")
|
||||
assert.NotContains(t, result, "green done")
|
||||
}
|
||||
|
||||
@@ -5,8 +5,9 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/brain"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -39,42 +40,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
model = s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, a.Error+" "+a.Context, 3)
|
||||
|
||||
task := fmt.Sprintf(
|
||||
"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, a.Error, a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "debug", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "debug", task)
|
||||
if brainCtx != "" {
|
||||
task = brainCtx + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
Model: model,
|
||||
Tools: "Read,Bash",
|
||||
})
|
||||
t0 := time.Now()
|
||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
msg := text
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "debug",
|
||||
Phase: "debug",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
|
||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -33,29 +32,22 @@ func TestDebugRequiresError(t *testing.T) {
|
||||
assert.ErrorContains(t, err, "error")
|
||||
}
|
||||
|
||||
func TestDebugCallsExecutor(t *testing.T) {
|
||||
called := false
|
||||
func TestDebugCallsCompleteFunc(t *testing.T) {
|
||||
var capturedTask string
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
called = true
|
||||
capturedTask = req.TaskPrompt
|
||||
return iexec.Result{
|
||||
Status: "pass", Phase: "debug", Skill: "debug",
|
||||
RunnerOutput: "HYPOTHESIS 1 (likelihood: high): nil map access\nVERIFY: go test ./... → expected: panic line reference",
|
||||
Verified: false, ModelUsed: "self", Message: "3 hypotheses for: panic nil pointer at foo.go:42",
|
||||
}, nil
|
||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "HYPOTHESIS 1 (high): nil map access. Verify: go test ./...", 90, nil
|
||||
}
|
||||
|
||||
sk := debug.New(debug.Config{SkillPrompt: "debug rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||
sk := debug.New(debug.Config{SkillPrompt: "debug rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||
out, err := sk.Handle(context.Background(), "debug", json.RawMessage(
|
||||
`{"project_root":"/tmp/proj","error":"panic: nil pointer dereference at foo.go:42","context":"occurs on startup"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
assert.True(t, called)
|
||||
assert.Contains(t, capturedTask, "panic: nil pointer dereference")
|
||||
assert.Contains(t, capturedTask, "occurs on startup")
|
||||
|
||||
var result iexec.Result
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(out, &result))
|
||||
assert.Equal(t, "debug", result.Phase)
|
||||
assert.Contains(t, result["text"], "nil map access")
|
||||
}
|
||||
|
||||
@@ -5,19 +5,19 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn is the function signature for running a worker subprocess.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
// Config holds dependencies for the debug skill.
|
||||
type Config struct {
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
ExecutorFn ExecutorFn
|
||||
SessionsDir string
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
CompleteFunc CompleteFunc
|
||||
SessionsDir string
|
||||
IngestBaseURL string
|
||||
}
|
||||
|
||||
// Skill implements the debug MCP tool.
|
||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "debug",
|
||||
Description: "Analyse an error and return 3-5 hypotheses ordered by likelihood, each with a concrete verification step.",
|
||||
Description: "Consult a local model to analyse an error and return hypotheses ordered by likelihood, each with a concrete verification step.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "error"},
|
||||
map[string]any{
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -33,7 +33,6 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
model = s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
// Read session log entries (empty slice if no log exists yet).
|
||||
entries, err := session.Read(s.cfg.SessionsDir, a.SessionID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read session log: %w", err)
|
||||
@@ -45,26 +44,33 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
}
|
||||
|
||||
taskPrompt := fmt.Sprintf(
|
||||
"SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Write structured entries to brain/raw/ via brain_write. Return JSON result when done.",
|
||||
"SESSION_ID: %s\n\nSESSION_LOG:\n%s\n\nReview this session log. Identify what is novel or worth preserving as organizational knowledge. Provide structured insights.",
|
||||
a.SessionID, string(logJSON),
|
||||
)
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: taskPrompt,
|
||||
Model: model,
|
||||
Tools: "Bash,Read,Write",
|
||||
})
|
||||
t0 := time.Now()
|
||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, taskPrompt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("retrospective worker: %w", err)
|
||||
return nil, fmt.Errorf("retrospective model: %w", err)
|
||||
}
|
||||
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
msg := text
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
return b, nil
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "retrospective",
|
||||
Phase: "retrospective",
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
|
||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -20,20 +19,14 @@ func TestHandle_Retrospective_RequiresSessionID(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) {
|
||||
var capturedReq iexec.Request
|
||||
var capturedTask string
|
||||
s := retrospective.New(retrospective.Config{
|
||||
SkillPrompt: "retrospective discipline",
|
||||
DefaultModel: "ollama/test",
|
||||
SessionsDir: t.TempDir(), // empty dir, no session file — that's OK, session.Read returns nil
|
||||
ExecutorFn: func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
capturedReq = req
|
||||
return iexec.Result{
|
||||
Status: "pass",
|
||||
Phase: "retrospective",
|
||||
Skill: "retrospective",
|
||||
Verified: true,
|
||||
Message: "wrote 2 entries to brain",
|
||||
}, nil
|
||||
SessionsDir: t.TempDir(),
|
||||
CompleteFunc: func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "Key insight: the team resolved a tricky nil pointer issue via careful logging.", 75, nil
|
||||
},
|
||||
})
|
||||
|
||||
@@ -41,9 +34,8 @@ func TestHandle_Retrospective_BuildsPromptWithSessionLog(t *testing.T) {
|
||||
out, err := s.Handle(context.Background(), "retrospective", args)
|
||||
require.NoError(t, err)
|
||||
|
||||
var result iexec.Result
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(out, &result))
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Contains(t, capturedReq.SkillPrompt, "retrospective discipline")
|
||||
assert.Contains(t, capturedReq.TaskPrompt, "empty-session")
|
||||
assert.Contains(t, result["text"], "nil pointer")
|
||||
assert.Contains(t, capturedTask, "empty-session")
|
||||
}
|
||||
|
||||
@@ -5,19 +5,18 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn allows injecting a test double for the subprocess executor.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
// Config holds retrospective skill configuration.
|
||||
type Config struct {
|
||||
SkillPrompt string // content of retrospective.md
|
||||
DefaultModel string // model to use when not specified in args
|
||||
SessionsDir string // path to brain/sessions/
|
||||
ExecutorFn ExecutorFn // injected executor
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
SessionsDir string
|
||||
CompleteFunc CompleteFunc
|
||||
}
|
||||
|
||||
// Skill implements registry.Skill for the retrospective tool.
|
||||
@@ -36,7 +35,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "retrospective",
|
||||
Description: "Run a retrospective on a completed session. Reads the session log, identifies novel learnings, and writes structured entries to the brain for ingestion. Call at the end of each coding session.",
|
||||
Description: "Consult a local model to analyse a completed session and identify what is novel or worth preserving as organizational knowledge.",
|
||||
InputSchema: json.RawMessage(`{
|
||||
"type": "object",
|
||||
"required": ["session_id"],
|
||||
|
||||
@@ -6,8 +6,9 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/brain"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -40,42 +41,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
model = s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, strings.Join(a.Files, " ")+" "+a.Context, 3)
|
||||
|
||||
task := fmt.Sprintf(
|
||||
"phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "review", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "review", task)
|
||||
if brainCtx != "" {
|
||||
task = brainCtx + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
Model: model,
|
||||
Tools: "Read,Bash",
|
||||
})
|
||||
t0 := time.Now()
|
||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
msg := text
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "review",
|
||||
Phase: "review",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
|
||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/review"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -33,29 +32,22 @@ func TestReviewRequiresFiles(t *testing.T) {
|
||||
assert.ErrorContains(t, err, "files")
|
||||
}
|
||||
|
||||
func TestReviewCallsExecutor(t *testing.T) {
|
||||
called := false
|
||||
func TestReviewCallsCompleteFunc(t *testing.T) {
|
||||
var capturedTask string
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
called = true
|
||||
capturedTask = req.TaskPrompt
|
||||
return iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review",
|
||||
Verified: true, ModelUsed: "self", Message: "2 warnings found",
|
||||
}, nil
|
||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "2 warnings found: missing error handling at line 42", 80, nil
|
||||
}
|
||||
|
||||
sk := review.New(review.Config{SkillPrompt: "review rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||
sk := review.New(review.Config{SkillPrompt: "review rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||
out, err := sk.Handle(context.Background(), "review", json.RawMessage(
|
||||
`{"project_root":"/tmp/proj","files":["internal/foo/foo.go"],"context":"PR: add Foo helper"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
assert.True(t, called)
|
||||
assert.Contains(t, capturedTask, "internal/foo/foo.go")
|
||||
assert.Contains(t, capturedTask, "PR: add Foo helper")
|
||||
|
||||
var result iexec.Result
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(out, &result))
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Equal(t, "review", result.Phase)
|
||||
assert.Contains(t, result["text"], "2 warnings found")
|
||||
}
|
||||
|
||||
@@ -5,19 +5,19 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn is the function signature for running a worker subprocess.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
// Config holds dependencies for the review skill.
|
||||
type Config struct {
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
ExecutorFn ExecutorFn
|
||||
SessionsDir string
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
CompleteFunc CompleteFunc
|
||||
SessionsDir string
|
||||
IngestBaseURL string
|
||||
}
|
||||
|
||||
// Skill implements the review MCP tool.
|
||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "review",
|
||||
Description: "Perform a structured code review of the specified files. Returns findings with severity levels.",
|
||||
Description: "Consult a local model for a structured code review of the specified files. Returns findings with severity levels.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "files"},
|
||||
map[string]any{
|
||||
|
||||
@@ -5,8 +5,9 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/brain"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -44,42 +45,43 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
model = s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, a.Requirements+" "+a.Context, 3)
|
||||
|
||||
task := fmt.Sprintf(
|
||||
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "spec", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "spec", task)
|
||||
if brainCtx != "" {
|
||||
task = brainCtx + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
Model: model,
|
||||
Tools: "Read,Write",
|
||||
})
|
||||
t0 := time.Now()
|
||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
msg := text
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "spec",
|
||||
Phase: "spec",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
|
||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -33,29 +32,22 @@ func TestSpecRequiresRequirements(t *testing.T) {
|
||||
assert.ErrorContains(t, err, "requirements")
|
||||
}
|
||||
|
||||
func TestSpecCallsExecutor(t *testing.T) {
|
||||
called := false
|
||||
func TestSpecCallsCompleteFunc(t *testing.T) {
|
||||
var capturedTask string
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
called = true
|
||||
capturedTask = req.TaskPrompt
|
||||
return iexec.Result{
|
||||
Status: "pass", Phase: "spec", Skill: "spec",
|
||||
FilePath: "/tmp/proj/docs/login-spec.md",
|
||||
Verified: true, ModelUsed: "self", Message: "spec written: login feature",
|
||||
}, nil
|
||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "# OAuth2 Login Spec\n\n## Overview\nImplement OAuth2 login flow.", 110, nil
|
||||
}
|
||||
|
||||
sk := spec.New(spec.Config{SkillPrompt: "spec rules", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||
sk := spec.New(spec.Config{SkillPrompt: "spec rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||
out, err := sk.Handle(context.Background(), "spec", json.RawMessage(
|
||||
`{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
assert.True(t, called)
|
||||
assert.Contains(t, capturedTask, "OAuth2 login")
|
||||
assert.Contains(t, capturedTask, "docs/login-spec.md")
|
||||
|
||||
var result iexec.Result
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(out, &result))
|
||||
assert.Equal(t, "spec", result.Phase)
|
||||
assert.Contains(t, result["text"], "OAuth2 Login Spec")
|
||||
}
|
||||
|
||||
@@ -5,19 +5,19 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn is the function signature for running a worker subprocess.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
// Config holds dependencies for the spec skill.
|
||||
type Config struct {
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
ExecutorFn ExecutorFn
|
||||
SessionsDir string
|
||||
SkillPrompt string
|
||||
DefaultModel string
|
||||
CompleteFunc CompleteFunc
|
||||
SessionsDir string
|
||||
IngestBaseURL string
|
||||
}
|
||||
|
||||
// Skill implements the spec MCP tool.
|
||||
@@ -39,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "spec",
|
||||
Description: "Generate a structured implementation spec from requirements. Writes the spec to output_path in the project.",
|
||||
Description: "Consult a local model to draft a structured implementation spec from requirements. Returns the spec text.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "requirements"},
|
||||
map[string]any{
|
||||
|
||||
@@ -4,8 +4,9 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/brain"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -40,11 +41,16 @@ func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMes
|
||||
if args.Spec == "" {
|
||||
return nil, fmt.Errorf("spec is required")
|
||||
}
|
||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, args.Spec, 3)
|
||||
|
||||
task := fmt.Sprintf(
|
||||
"phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s",
|
||||
args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd,
|
||||
)
|
||||
return s.execute(ctx, task)
|
||||
if brainCtx != "" {
|
||||
task = brainCtx + "\n---\n\n" + task
|
||||
}
|
||||
return s.complete(ctx, s.resolveModel(args.Model), task)
|
||||
}
|
||||
|
||||
type greenArgs struct {
|
||||
@@ -70,8 +76,15 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
|
||||
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
||||
)
|
||||
task = s.prependHistory(args.SessionID, "green", task)
|
||||
return s.execute(ctx, task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)
|
||||
|
||||
t0 := time.Now()
|
||||
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "green", s.resolveModel(args.Model), t0, result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type refactorArgs struct {
|
||||
@@ -101,23 +114,15 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
|
||||
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
||||
)
|
||||
task = s.prependHistory(args.SessionID, "refactor", task)
|
||||
return s.execute(ctx, task)
|
||||
}
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
t0 := time.Now()
|
||||
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "refactor", s.resolveModel(args.Model), t0, result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Skill) resolveModel(override string) string {
|
||||
@@ -127,17 +132,42 @@ func (s *Skill) resolveModel(override string) string {
|
||||
return s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) {
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
// complete calls CompleteFunc and returns the text as JSON.
|
||||
func (s *Skill) complete(ctx context.Context, model, task string) (json.RawMessage, error) {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
req := iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
}
|
||||
result, err := s.cfg.ExecutorFn(ctx, req)
|
||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return json.Marshal(result)
|
||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
||||
}
|
||||
|
||||
// logEntry writes a session.Entry for a completed phase if session_id is set.
|
||||
func (s *Skill) logEntry(sessionID, projectRoot, skill, phase, model string, t0 time.Time, raw json.RawMessage) {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return
|
||||
}
|
||||
var msg string
|
||||
var result struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &result); err == nil && len(result.Text) > 0 {
|
||||
msg = result.Text
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
}
|
||||
_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
|
||||
SessionID: sessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: skill,
|
||||
Phase: phase,
|
||||
ProjectRoot: projectRoot,
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -14,8 +13,7 @@ import (
|
||||
|
||||
func TestTDDSkillTools(t *testing.T) {
|
||||
skill := tdd.New(tdd.Config{
|
||||
SystemPrompt: "supervisor rules",
|
||||
SkillPrompt: "tdd rules",
|
||||
SkillPrompt: "tdd rules",
|
||||
})
|
||||
tools := skill.Tools()
|
||||
names := make([]string, len(tools))
|
||||
@@ -26,19 +24,19 @@ func TestTDDSkillTools(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTDDSkillHandleUnknown(t *testing.T) {
|
||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||
_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
|
||||
assert.ErrorContains(t, err, "unknown tool")
|
||||
}
|
||||
|
||||
func TestTDDRedRequiresProjectRoot(t *testing.T) {
|
||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
|
||||
assert.ErrorContains(t, err, "project_root")
|
||||
}
|
||||
|
||||
func TestTDDRedRequiresSpec(t *testing.T) {
|
||||
skill := tdd.New(tdd.Config{SystemPrompt: "s", SkillPrompt: "t"})
|
||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
|
||||
assert.ErrorContains(t, err, "spec")
|
||||
}
|
||||
@@ -51,35 +49,49 @@ func TestTDDGreenInjectsSessionHistory(t *testing.T) {
|
||||
Message: "wrote failing test for Foo",
|
||||
}))
|
||||
|
||||
var capturedPrompt string
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
capturedPrompt = req.TaskPrompt
|
||||
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
||||
var capturedTask string
|
||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "here is my suggestion", 100, nil
|
||||
}
|
||||
|
||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: sessDir})
|
||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: sessDir})
|
||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, capturedPrompt, "## Session history")
|
||||
assert.Contains(t, capturedPrompt, "wrote failing test for Foo")
|
||||
assert.Contains(t, capturedTask, "## Session history")
|
||||
assert.Contains(t, capturedTask, "wrote failing test for Foo")
|
||||
}
|
||||
|
||||
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
|
||||
var capturedPrompt string
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
capturedPrompt = req.TaskPrompt
|
||||
return iexec.Result{Status: "pass", Phase: "green", Skill: "tdd", Verified: true, ModelUsed: "self", Message: "ok"}, nil
|
||||
var capturedTask string
|
||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
||||
capturedTask = user
|
||||
return "suggestion", 50, nil
|
||||
}
|
||||
|
||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", ExecutorFn: fakeFn, SessionsDir: t.TempDir()})
|
||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, capturedPrompt, "## Session history")
|
||||
assert.NotContains(t, capturedTask, "## Session history")
|
||||
}
|
||||
|
||||
// Ensure require is used (avoids import error).
|
||||
var _ = require.New
|
||||
func TestTDDGreenReturnsTextJSON(t *testing.T) {
|
||||
fakeFn := func(_ context.Context, _, _, _ string) (string, int64, error) {
|
||||
return "write a func that adds two ints", 42, nil
|
||||
}
|
||||
|
||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn})
|
||||
raw, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
||||
`{"project_root":"/tmp","test_path":"foo_test.go"}`,
|
||||
))
|
||||
require.NoError(t, err)
|
||||
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(raw, &result))
|
||||
assert.Equal(t, "write a func that adds two ints", result["text"])
|
||||
assert.Equal(t, float64(42), result["duration_ms"])
|
||||
}
|
||||
|
||||
@@ -4,19 +4,18 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn allows injecting a test double for the executor.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
type Config struct {
|
||||
SystemPrompt string
|
||||
SkillPrompt string
|
||||
ExecutorFn ExecutorFn // nil = no executor (tests that don't reach execute())
|
||||
DefaultModel string
|
||||
SessionsDir string // optional: path to brain/sessions/ for history injection
|
||||
SkillPrompt string
|
||||
CompleteFunc CompleteFunc // nil = no executor (tests that don't reach execute())
|
||||
DefaultModel string
|
||||
SessionsDir string // optional: path to brain/sessions/ for history injection
|
||||
IngestBaseURL string // optional: base URL of ingestion server for brain context
|
||||
}
|
||||
|
||||
type Skill struct {
|
||||
@@ -43,7 +42,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "tdd_red",
|
||||
Description: "Write a failing test for the described behavior. Verifies the test fails before returning.",
|
||||
Description: "Consult a local model for help writing a failing test for the described behavior.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "spec"},
|
||||
map[string]any{
|
||||
@@ -56,7 +55,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
},
|
||||
{
|
||||
Name: "tdd_green",
|
||||
Description: "Write minimal implementation to make the test at test_path pass.",
|
||||
Description: "Consult a local model for implementation ideas to make the test at test_path pass.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "test_path"},
|
||||
map[string]any{
|
||||
@@ -70,7 +69,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
},
|
||||
{
|
||||
Name: "tdd_refactor",
|
||||
Description: "Refactor the implementation at impl_path while keeping tests green.",
|
||||
Description: "Consult a local model for refactoring suggestions for impl_path while keeping tests green.",
|
||||
InputSchema: schema(
|
||||
[]string{"project_root", "test_path", "impl_path"},
|
||||
map[string]any{
|
||||
|
||||
@@ -5,8 +5,8 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
)
|
||||
|
||||
@@ -27,7 +27,7 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
if a.SessionID == "" {
|
||||
return nil, fmt.Errorf("session_id is required")
|
||||
}
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
if s.cfg.CompleteFunc == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
|
||||
@@ -41,40 +41,47 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
return nil, fmt.Errorf("read session log: %w", err)
|
||||
}
|
||||
|
||||
// ── Step 1: Reader agent ─────────────────────────────────────────────────
|
||||
// ── Step 1: Reader ────────────────────────────────────────────────────────
|
||||
history := session.FormatHistory(entries, "")
|
||||
readerTask := fmt.Sprintf(
|
||||
"role: reader\nsession_id: %s\nbrain_dir: %s\n\n%s",
|
||||
a.SessionID, s.cfg.BrainDir, history,
|
||||
)
|
||||
readerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.ReaderPrompt,
|
||||
TaskPrompt: readerTask,
|
||||
Model: model,
|
||||
Tools: "Read",
|
||||
})
|
||||
readerText, _, err := s.cfg.CompleteFunc(ctx, model, s.cfg.ReaderPrompt, readerTask)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reader agent: %w", err)
|
||||
return nil, fmt.Errorf("reader: %w", err)
|
||||
}
|
||||
|
||||
// ── Step 2: Writer agent (receives reader candidates) ────────────────────
|
||||
// ── Step 2: Writer (receives reader output) ───────────────────────────────
|
||||
t0 := time.Now()
|
||||
writerTask := fmt.Sprintf(
|
||||
"role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_summary: %s\nreader_candidates:\n%s",
|
||||
a.SessionID, s.cfg.BrainDir, readerResult.Message, readerResult.RunnerOutput,
|
||||
"role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_analysis:\n%s",
|
||||
a.SessionID, s.cfg.BrainDir, readerText,
|
||||
)
|
||||
writerResult, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.WriterPrompt,
|
||||
TaskPrompt: writerTask,
|
||||
Model: model,
|
||||
Tools: "Read,Write",
|
||||
})
|
||||
writerText, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.WriterPrompt, writerTask)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("writer agent: %w", err)
|
||||
return nil, fmt.Errorf("writer: %w", err)
|
||||
}
|
||||
|
||||
b, err := json.Marshal(writerResult)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
msg := writerText
|
||||
if len(msg) > 200 {
|
||||
msg = msg[:200]
|
||||
}
|
||||
return b, nil
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "trainer",
|
||||
Phase: "trainer",
|
||||
FinalStatus: "ok",
|
||||
ModelUsed: model,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: msg,
|
||||
})
|
||||
|
||||
return json.Marshal(map[string]any{
|
||||
"reader_analysis": readerText,
|
||||
"writer_output": writerText,
|
||||
"model": model,
|
||||
"duration_ms": dur,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -31,52 +30,44 @@ func TestTrainerRequiresSessionID(t *testing.T) {
|
||||
func TestTrainerCallsReaderThenWriter(t *testing.T) {
|
||||
sessDir := t.TempDir()
|
||||
require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{
|
||||
SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass",
|
||||
SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "ok",
|
||||
Message: "wrote failing test", FilePath: "internal/foo/foo_test.go",
|
||||
}))
|
||||
|
||||
callCount := 0
|
||||
var readerTask, writerTask string
|
||||
|
||||
fakeFn := func(_ context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
fakeFn := func(_ context.Context, _, sys, user string) (string, int64, error) {
|
||||
callCount++
|
||||
if callCount == 1 {
|
||||
// reader call
|
||||
readerTask = req.TaskPrompt
|
||||
return iexec.Result{
|
||||
Status: "pass", Phase: "trainer", Skill: "trainer",
|
||||
RunnerOutput: `[{"type":"sft","moment":"first-pass clean TDD","score":4}]`,
|
||||
Verified: true, ModelUsed: "self", Message: "1 sft candidate found",
|
||||
}, nil
|
||||
readerTask = user
|
||||
return "1 sft candidate found: first-pass clean TDD", 60, nil
|
||||
}
|
||||
// writer call
|
||||
writerTask = req.TaskPrompt
|
||||
return iexec.Result{
|
||||
Status: "pass", Phase: "trainer", Skill: "trainer",
|
||||
FilePath: sessDir + "/training-data/sft/sess-1.jsonl",
|
||||
Verified: true, ModelUsed: "self", Message: "1 sft pair written",
|
||||
}, nil
|
||||
writerTask = user
|
||||
return "written 1 knowledge entry to brain/knowledge/tdd-patterns.md", 70, nil
|
||||
}
|
||||
|
||||
sk := trainer.New(trainer.Config{
|
||||
ReaderPrompt: "reader rules",
|
||||
WriterPrompt: "writer rules",
|
||||
ExecutorFn: fakeFn,
|
||||
CompleteFunc: fakeFn,
|
||||
SessionsDir: sessDir,
|
||||
BrainDir: t.TempDir(),
|
||||
})
|
||||
out, err := sk.Handle(context.Background(), "trainer", json.RawMessage(`{"session_id":"sess-1"}`))
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, 2, callCount, "executor must be called exactly twice: reader then writer")
|
||||
assert.Equal(t, 2, callCount, "complete must be called exactly twice: reader then writer")
|
||||
assert.Contains(t, readerTask, "role: reader")
|
||||
assert.Contains(t, readerTask, "sess-1")
|
||||
assert.Contains(t, readerTask, "wrote failing test") // session history in reader prompt
|
||||
assert.Contains(t, readerTask, "wrote failing test")
|
||||
assert.Contains(t, writerTask, "role: writer")
|
||||
assert.Contains(t, writerTask, "sft candidate") // reader output passed to writer
|
||||
assert.Contains(t, writerTask, "sft candidate")
|
||||
|
||||
var result iexec.Result
|
||||
var result map[string]any
|
||||
require.NoError(t, json.Unmarshal(out, &result))
|
||||
assert.Equal(t, "trainer", result.Phase)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Contains(t, result["reader_analysis"], "sft candidate")
|
||||
assert.Contains(t, result["writer_output"], "knowledge entry")
|
||||
}
|
||||
|
||||
@@ -5,21 +5,20 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/registry"
|
||||
)
|
||||
|
||||
// ExecutorFn is the function signature for running a worker subprocess.
|
||||
type ExecutorFn func(ctx context.Context, req iexec.Request) (iexec.Result, error)
|
||||
// CompleteFunc is the function used to call a local model.
|
||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
||||
|
||||
// Config holds dependencies for the trainer skill.
|
||||
type Config struct {
|
||||
ReaderPrompt string
|
||||
WriterPrompt string
|
||||
DefaultModel string
|
||||
ExecutorFn ExecutorFn
|
||||
CompleteFunc CompleteFunc
|
||||
SessionsDir string
|
||||
BrainDir string // root of brain/ directory; writer writes to BrainDir/training-data/
|
||||
BrainDir string // root of brain/ directory
|
||||
}
|
||||
|
||||
// Skill implements the trainer MCP tool.
|
||||
@@ -40,7 +39,7 @@ func (s *Skill) Tools() []registry.ToolDef {
|
||||
return []registry.ToolDef{
|
||||
{
|
||||
Name: "trainer",
|
||||
Description: "Extract SFT and DPO training pairs from a session log. Runs a reader→writer chain: reader identifies learning moments, writer formats and writes pairs to brain/training-data/.",
|
||||
Description: "Consult a local model to identify learning moments from a session log and suggest knowledge to preserve in the brain.",
|
||||
InputSchema: schema(
|
||||
[]string{"session_id"},
|
||||
map[string]any{
|
||||
|
||||
Reference in New Issue
Block a user