Compare commits
38 Commits
v0.2.0
...
7d5289ac54
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d5289ac54 | ||
|
|
3d8fc9dacd | ||
|
|
f9f804cd49 | ||
|
|
85f142ade0 | ||
|
|
0dfad02513 | ||
|
|
c44eb680b2 | ||
|
|
38ada998a2 | ||
|
|
74547c2bdf | ||
|
|
587c0d3b1c | ||
|
|
bb61f2992b | ||
|
|
3ba72d9b28 | ||
|
|
b4f0fbc3ea | ||
|
|
12943ee6f4 | ||
|
|
9af95ebd96 | ||
|
|
f0b567f3e6 | ||
|
|
e3d6cf4cf5 | ||
|
|
df59bd010c | ||
|
|
e5152151d6 | ||
|
|
aa2d57e619 | ||
|
|
6b53706987 | ||
|
|
a0cfc866df | ||
|
|
7bf19b6a7b | ||
|
|
19b019a8d8 | ||
|
|
4ef6a22e28 | ||
|
|
3796cfca87 | ||
|
|
7ce544a051 | ||
|
|
391720155e | ||
|
|
ae6600b8d2 | ||
|
|
6328766c7f | ||
|
|
f1deedd39d | ||
|
|
5cb272a869 | ||
|
|
e96b39a812 | ||
|
|
5db5b33cd7 | ||
|
|
a32457b5bc | ||
|
|
e0be5f0f98 | ||
|
|
6d410b810b | ||
|
|
76f195de2a | ||
|
|
f901d4e67d |
10
.dockerignore
Normal file
10
.dockerignore
Normal file
@@ -0,0 +1,10 @@
|
||||
.git
|
||||
.gitea
|
||||
.worktrees
|
||||
.DS_Store
|
||||
*.log
|
||||
.env*
|
||||
.vscode
|
||||
.idea
|
||||
bin/
|
||||
brain/
|
||||
68
.gitea/workflows/cd.yml
Normal file
68
.gitea/workflows/cd.yml
Normal file
@@ -0,0 +1,68 @@
|
||||
name: cd
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Build and deploy
|
||||
runs-on: self-hosted
|
||||
env:
|
||||
SERVICE: supervisor
|
||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and push image
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
# Build to local OCI tar (no registry auth needed at build time)
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||
--output type=oci,dest=/tmp/supervisor-image.tar
|
||||
|
||||
# Push with skopeo using simple credential flag (avoids OAuth token flow)
|
||||
skopeo copy \
|
||||
oci-archive:/tmp/supervisor-image.tar \
|
||||
docker://${IMAGE}:${IMAGE_TAG} \
|
||||
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
||||
|
||||
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -rf /tmp/infra-update; rm -f ~/.ssh/infra_deploy_key' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
# Use internal Gitea SSH (runner is on koala — NodePort 30022 is reachable locally)
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
||||
chmod 600 ~/.ssh/infra_deploy_key
|
||||
printf 'Host gitea.d-ma.be\n HostName 127.0.0.1\n Port 30022\n StrictHostKeyChecking no\n' >> ~/.ssh/config
|
||||
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git clone "${INFRA_REPO}" /tmp/infra-update
|
||||
|
||||
cd /tmp/infra-update
|
||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||
"k3s/apps/${SERVICE}/deployment.yaml"
|
||||
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add "k3s/apps/${SERVICE}/deployment.yaml"
|
||||
git commit -m "chore(deploy): ${SERVICE} → ${IMAGE_TAG}"
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git push
|
||||
|
||||
echo "Infra repo updated: ${SERVICE} → ${IMAGE_TAG}"
|
||||
@@ -53,6 +53,6 @@ jobs:
|
||||
chmod 600 ~/.ssh/id_rsa_gh_mirror
|
||||
ssh-keyscan github.com >> ~/.ssh/known_hosts 2>/dev/null
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/id_rsa_gh_mirror -o IdentitiesOnly=yes" \
|
||||
git push git@github.com:mathiasb/hyperguild.git HEAD:main --tags
|
||||
git push git@github.com:mathiasb/hyperguild.git HEAD:main --follow-tags
|
||||
rm ~/.ssh/id_rsa_gh_mirror
|
||||
echo "✓ Mirrored to GitHub"
|
||||
|
||||
50
Dockerfile
Normal file
50
Dockerfile
Normal file
@@ -0,0 +1,50 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ── Build stage ───────────────────────────────────────────────────────────────
|
||||
FROM golang:1.26-bookworm AS builder
|
||||
|
||||
ARG VERSION=dev
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
|
||||
-o /out/supervisor ./cmd/supervisor
|
||||
|
||||
# ── Runtime stage ─────────────────────────────────────────────────────────────
|
||||
# Node.js 22 slim — needed for claude CLI subprocess
|
||||
FROM node:22-slim
|
||||
|
||||
# Install claude CLI (provides the `claude` binary the supervisor shells out to)
|
||||
RUN npm install -g @anthropic-ai/claude-code \
|
||||
&& claude --version \
|
||||
&& echo "claude CLI installed"
|
||||
|
||||
# Copy supervisor binary
|
||||
COPY --from=builder /out/supervisor /usr/local/bin/supervisor
|
||||
|
||||
# Bake in config (models.yaml + skill discipline files)
|
||||
COPY config/ /app/config/
|
||||
|
||||
# Run as non-root
|
||||
RUN groupadd -r supervisor && useradd -r -g supervisor -d /app supervisor
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# brain/ is writable state — mount a PersistentVolume here
|
||||
VOLUME /app/brain
|
||||
|
||||
ENV SUPERVISOR_CONFIG_DIR=/app/config/supervisor
|
||||
ENV SUPERVISOR_MODELS_FILE=/app/config/models.yaml
|
||||
ENV SUPERVISOR_BRAIN_DIR=/app/brain
|
||||
ENV SUPERVISOR_SESSIONS_DIR=/app/brain/sessions
|
||||
ENV SUPERVISOR_PORT=3200
|
||||
|
||||
USER supervisor
|
||||
|
||||
EXPOSE 3200
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/supervisor"]
|
||||
@@ -84,11 +84,28 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
executor := iexec.New(iexec.Config{
|
||||
claudeExec := iexec.New(iexec.Config{
|
||||
SystemPrompt: string(systemPrompt),
|
||||
LiteLLMBaseURL: cfg.LiteLLMBaseURL,
|
||||
LiteLLMAPIKey: cfg.LiteLLMAPIKey,
|
||||
})
|
||||
litellmExec := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
||||
verifier := iexec.NewVerifier("", models.Verifier(), 0)
|
||||
|
||||
buildOrch := func(skill string) func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
return func(ctx context.Context, req iexec.Request) (iexec.Result, error) {
|
||||
rawChain := models.ChainFor(skill, req.Model)
|
||||
chain := make([]iexec.ChainEntry, len(rawChain))
|
||||
for i, m := range rawChain {
|
||||
chain[i] = iexec.EntryFor(m)
|
||||
}
|
||||
attempts := make([]iexec.AttemptRecord, 0, len(chain))
|
||||
orch := iexec.NewOrchestrator(chain, litellmExec.Run, claudeExec.Run, verifier, models.LlamaSwapURL(), &attempts)
|
||||
result, err := orch.Run(ctx, req)
|
||||
result.Attempts = attempts // attach orchestration metadata before returning
|
||||
return result, err
|
||||
}
|
||||
}
|
||||
|
||||
tierFn := func(ctx context.Context) tier.Info {
|
||||
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
||||
@@ -98,8 +115,8 @@ func main() {
|
||||
reg.Register(tdd.New(tdd.Config{
|
||||
SystemPrompt: string(systemPrompt),
|
||||
SkillPrompt: string(tddPrompt),
|
||||
DefaultModel: models.Resolve("tdd", ""),
|
||||
ExecutorFn: executor.Run,
|
||||
DefaultModel: models.ChainFor("tdd", "")[0],
|
||||
ExecutorFn: buildOrch("tdd"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
}))
|
||||
reg.Register(brain.New(brain.Config{
|
||||
@@ -113,33 +130,33 @@ func main() {
|
||||
}))
|
||||
reg.Register(retrospective.New(retrospective.Config{
|
||||
SkillPrompt: string(retroPrompt),
|
||||
DefaultModel: models.Resolve("retrospective", ""),
|
||||
DefaultModel: models.ChainFor("retrospective", "")[0],
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
ExecutorFn: executor.Run,
|
||||
ExecutorFn: buildOrch("retrospective"),
|
||||
}))
|
||||
reg.Register(review.New(review.Config{
|
||||
SkillPrompt: string(reviewPrompt),
|
||||
DefaultModel: models.Resolve("review", ""),
|
||||
ExecutorFn: executor.Run,
|
||||
DefaultModel: models.ChainFor("review", "")[0],
|
||||
ExecutorFn: buildOrch("review"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
}))
|
||||
reg.Register(skilldebug.New(skilldebug.Config{
|
||||
SkillPrompt: string(debugPrompt),
|
||||
DefaultModel: models.Resolve("debug", ""),
|
||||
ExecutorFn: executor.Run,
|
||||
DefaultModel: models.ChainFor("debug", "")[0],
|
||||
ExecutorFn: buildOrch("debug"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
}))
|
||||
reg.Register(spec.New(spec.Config{
|
||||
SkillPrompt: string(specPrompt),
|
||||
DefaultModel: models.Resolve("spec", ""),
|
||||
ExecutorFn: executor.Run,
|
||||
DefaultModel: models.ChainFor("spec", "")[0],
|
||||
ExecutorFn: buildOrch("spec"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
}))
|
||||
reg.Register(trainer.New(trainer.Config{
|
||||
ReaderPrompt: string(trainerReaderPrompt),
|
||||
WriterPrompt: string(trainerWriterPrompt),
|
||||
DefaultModel: models.Resolve("trainer", ""),
|
||||
ExecutorFn: executor.Run,
|
||||
DefaultModel: models.ChainFor("trainer", "")[0],
|
||||
ExecutorFn: buildOrch("trainer"),
|
||||
SessionsDir: cfg.SessionsDir,
|
||||
BrainDir: cfg.BrainDir,
|
||||
}))
|
||||
@@ -149,7 +166,7 @@ func main() {
|
||||
mux.Handle("/mcp", srv)
|
||||
|
||||
addr := ":" + cfg.Port
|
||||
logger.Info("supervisor starting", "addr", addr)
|
||||
logger.Info("supervisor starting", "addr", addr, "version", "v0.4.0")
|
||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||
logger.Error("server stopped", "err", err)
|
||||
os.Exit(1)
|
||||
|
||||
@@ -1,13 +1,41 @@
|
||||
# Model routing table — three-layer priority:
|
||||
# 1. model param in MCP tool call (caller override)
|
||||
# 2. per-skill entry here
|
||||
# 3. default (fallback)
|
||||
default: ollama/qwen3-coder-30b-tuned
|
||||
# Model routing chains — three-layer priority:
|
||||
# 1. model param in MCP tool call (caller override — collapses to single entry, no escalation)
|
||||
# 2. per-skill chain here
|
||||
# 3. default_chain fallback
|
||||
|
||||
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
||||
|
||||
llama_swap_url: http://koala:8080 # for warm-state probing
|
||||
|
||||
default_chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
|
||||
skills:
|
||||
tdd: ollama/qwen3-coder-30b-tuned
|
||||
review: ollama/devstral-tuned
|
||||
debug: ollama/deepseek-r1-tuned
|
||||
retrospective: ollama/qwen3-coder-30b-tuned
|
||||
spec: ollama/qwen3-coder-30b-tuned
|
||||
trainer: ollama/qwen3-coder-30b-tuned
|
||||
tdd:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
review:
|
||||
chain:
|
||||
- ollama/devstral-tuned
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-6
|
||||
debug:
|
||||
chain:
|
||||
- ollama/deepseek-r1-tuned
|
||||
- claude-sonnet-4-6
|
||||
spec:
|
||||
chain:
|
||||
- ollama/phi4
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-6
|
||||
- claude-opus-4-6
|
||||
retrospective:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
trainer:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
|
||||
923
docs/superpowers/plans/2026-04-20-cd-pipeline.md
Normal file
923
docs/superpowers/plans/2026-04-20-cd-pipeline.md
Normal file
@@ -0,0 +1,923 @@
|
||||
# CD Pipeline Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Build a GitOps CD pipeline that automatically builds a container image on `main` push and deploys it to k3s on koala via Flux.
|
||||
|
||||
**Architecture:** BuildKit runs as a systemd service on koala (same host as the Gitea runner); CD pushes images to the Gitea registry and commits image tag updates to the infra repo; Flux reconciles within 60s. App secrets (including ANTHROPIC_API_KEY) are SOPS-encrypted in the infra repo and decrypted by Flux at apply time.
|
||||
|
||||
**Tech Stack:** Go 1.26, Node.js 22 (for claude CLI), BuildKit (buildctl), Gitea Actions, Flux (kustomize-controller), SOPS + age, k3s/containerd
|
||||
|
||||
---
|
||||
|
||||
## Environment context
|
||||
|
||||
This plan spans three environments. Each task header notes which environment it runs in:
|
||||
|
||||
- **[this-repo]** — `/Users/mathias/Documents/local-dev/AI/supervisor` on flamingo
|
||||
- **[koala-ssh]** — `ssh koala` (run commands via `ssh koala "..."`)
|
||||
- **[infra-repo]** — `gitea.d-ma.be/mathias/infra` (clone to a temp dir, work there, push)
|
||||
- **[gitea-ui]** — Gitea web UI at `https://gitea.d-ma.be`
|
||||
- **[kubectl]** — kubectl from flamingo (home LAN)
|
||||
|
||||
---
|
||||
|
||||
## File map
|
||||
|
||||
**This repo (supervisor):**
|
||||
- Create: `Dockerfile`
|
||||
- Create: `.gitea/workflows/cd.yml`
|
||||
|
||||
**koala host:**
|
||||
- Create: `/etc/systemd/system/buildkitd.service` (or user-level equivalent)
|
||||
- Create: `/root/.config/buildkit/buildkitd.toml` (registry auth config)
|
||||
|
||||
**Infra repo (`gitea.d-ma.be/mathias/infra`):**
|
||||
- Create: `apps/supervisor/namespace.yaml`
|
||||
- Create: `apps/supervisor/deployment.yaml`
|
||||
- Create: `apps/supervisor/service.yaml`
|
||||
- Create: `apps/supervisor/secrets.enc.yaml` (SOPS-encrypted)
|
||||
- Create: `apps/supervisor/kustomization.yaml`
|
||||
- Create: `apps/imagepullsecret/secret.enc.yaml` (SOPS-encrypted)
|
||||
- Create: `apps/imagepullsecret/kustomization.yaml`
|
||||
- Modify: `clusters/koala/kustomization.yaml` (add supervisor + imagepullsecret)
|
||||
- Modify: `flux-system/kustomization.yaml` or relevant Flux Kustomization CRD (add SOPS decryption)
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Dockerfile [this-repo]
|
||||
|
||||
The supervisor binary depends on the `claude` CLI as a subprocess. The image uses a multi-stage build: Go builder stage compiles the binary; the runtime stage is Node.js (for `npm install -g @anthropic-ai/claude-code`). Config files are baked in. The `brain/` directory is a volume mount.
|
||||
|
||||
**Files:**
|
||||
- Create: `Dockerfile`
|
||||
|
||||
- [ ] **Step 1: Verify no Dockerfile exists**
|
||||
|
||||
```bash
|
||||
ls Dockerfile 2>/dev/null || echo "confirmed: no Dockerfile"
|
||||
```
|
||||
|
||||
Expected: `confirmed: no Dockerfile`
|
||||
|
||||
- [ ] **Step 2: Create the Dockerfile**
|
||||
|
||||
```dockerfile
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ── Build stage ───────────────────────────────────────────────────────────────
|
||||
FROM golang:1.26-bookworm AS builder
|
||||
|
||||
ARG VERSION=dev
|
||||
WORKDIR /src
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
|
||||
-o /out/supervisor ./cmd/supervisor
|
||||
|
||||
# ── Runtime stage ─────────────────────────────────────────────────────────────
|
||||
# Node.js 22 slim — needed for claude CLI subprocess
|
||||
FROM node:22-slim
|
||||
|
||||
# Install claude CLI (provides the `claude` binary the supervisor shells out to)
|
||||
RUN npm install -g @anthropic-ai/claude-code \
|
||||
&& claude --version \
|
||||
&& echo "claude CLI installed"
|
||||
|
||||
# Copy supervisor binary
|
||||
COPY --from=builder /out/supervisor /usr/local/bin/supervisor
|
||||
|
||||
# Bake in config (models.yaml + skill discipline files)
|
||||
COPY config/ /app/config/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# brain/ is writable state — mount a PersistentVolume here
|
||||
VOLUME /app/brain
|
||||
|
||||
ENV SUPERVISOR_CONFIG_DIR=/app/config/supervisor
|
||||
ENV SUPERVISOR_MODELS_FILE=/app/config/models.yaml
|
||||
ENV SUPERVISOR_BRAIN_DIR=/app/brain
|
||||
ENV SUPERVISOR_SESSIONS_DIR=/app/brain/sessions
|
||||
ENV SUPERVISOR_PORT=3200
|
||||
|
||||
EXPOSE 3200
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/supervisor"]
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Build locally to verify it compiles (no push)**
|
||||
|
||||
```bash
|
||||
# buildctl must be available locally, OR use docker if available on flamingo
|
||||
docker build --target builder -t supervisor-build-test . && echo "build stage OK"
|
||||
# If no docker on flamingo, skip this step and verify at Task 3 on koala instead
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add Dockerfile
|
||||
git commit -m "feat: add multi-stage Dockerfile with claude CLI runtime"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 2: BuildKit systemd service on koala [koala-ssh]
|
||||
|
||||
Install `buildkitd` as a root-level systemd service on koala. The Gitea runner process runs as root (confirmed by PID/cgroup), so the root socket at `/run/buildkit/buildkitd.sock` is accessible to it.
|
||||
|
||||
**Files:**
|
||||
- Create: `/etc/systemd/system/buildkitd.service` on koala
|
||||
- Create: `/etc/buildkit/buildkitd.toml` on koala (registry auth)
|
||||
|
||||
- [ ] **Step 1: Check if buildkitd is already installed**
|
||||
|
||||
```bash
|
||||
ssh koala "buildkitd --version 2>/dev/null || echo 'not installed'"
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Install buildkitd on koala**
|
||||
|
||||
Download the latest buildkit release binary (arm64 or amd64 — koala has x86_64):
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
BUILDKIT_VERSION=v0.21.0
|
||||
curl -sSL https://github.com/moby/buildkit/releases/download/\${BUILDKIT_VERSION}/buildkit-\${BUILDKIT_VERSION}.linux-amd64.tar.gz \
|
||||
| tar -xz -C /usr/local/
|
||||
buildkitd --version
|
||||
"
|
||||
```
|
||||
|
||||
Expected output includes: `buildkitd github.com/moby/buildkit v0.21.0`
|
||||
|
||||
- [ ] **Step 3: Create buildkitd.toml with Gitea registry auth**
|
||||
|
||||
The `[registry]` block configures auth for pushing to `gitea.d-ma.be`. The actual credentials come from `~/.docker/config.json` (which buildkitd reads automatically) — this toml just enables the registry:
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
mkdir -p /etc/buildkit
|
||||
cat > /etc/buildkit/buildkitd.toml << 'EOF'
|
||||
[worker.containerd]
|
||||
enabled = false
|
||||
|
||||
[worker.oci]
|
||||
enabled = true
|
||||
|
||||
[registry.\"gitea.d-ma.be\"]
|
||||
http = false
|
||||
insecure = false
|
||||
EOF
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create systemd unit**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
cat > /etc/systemd/system/buildkitd.service << 'EOF'
|
||||
[Unit]
|
||||
Description=BuildKit daemon
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/local/bin/buildkitd --config /etc/buildkit/buildkitd.toml
|
||||
Restart=on-failure
|
||||
LimitNOFILE=1048576
|
||||
LimitNPROC=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
systemctl daemon-reload
|
||||
systemctl enable buildkitd
|
||||
systemctl start buildkitd
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Verify the socket exists and is responsive**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
systemctl status buildkitd --no-pager
|
||||
buildctl --addr unix:///run/buildkit/buildkitd.sock debug info
|
||||
"
|
||||
```
|
||||
|
||||
Expected: service `active (running)`, buildctl shows BuildKit version info.
|
||||
|
||||
- [ ] **Step 6: Smoke-test build with trivial Dockerfile**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
echo 'FROM alpine:3.21
|
||||
RUN echo hello' | buildctl --addr unix:///run/buildkit/buildkitd.sock build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=/ \
|
||||
--opt filename=Dockerfile \
|
||||
--output type=image,name=localhost/smoke-test:latest
|
||||
echo 'smoke test OK'
|
||||
"
|
||||
```
|
||||
|
||||
Expected: `smoke test OK`
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Gitea registry push auth for buildctl [koala-ssh]
|
||||
|
||||
`buildctl` reads Docker-style credentials from `/root/.docker/config.json`. Create the credentials file so the runner can push to `gitea.d-ma.be`.
|
||||
|
||||
**Prerequisites:** A Gitea user token or password with `write:packages` scope for the `mathias` org. Create one in Gitea → User Settings → Applications → Generate Token (scopes: `write:packages`).
|
||||
|
||||
- [ ] **Step 1: Create Gitea access token**
|
||||
|
||||
In Gitea UI (`https://gitea.d-ma.be`) → top-right avatar → Settings → Applications → Generate New Token.
|
||||
- Token name: `buildkit-push`
|
||||
- Scopes: `write:packages` (container registry write)
|
||||
- Copy the token — it won't be shown again.
|
||||
|
||||
- [ ] **Step 2: Write docker config.json on koala**
|
||||
|
||||
Replace `<TOKEN>` with the token from Step 1:
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
mkdir -p /root/.docker
|
||||
TOKEN=<TOKEN>
|
||||
AUTH=\$(echo -n 'mathias:'\${TOKEN} | base64)
|
||||
cat > /root/.docker/config.json << EOF
|
||||
{
|
||||
\"auths\": {
|
||||
\"gitea.d-ma.be\": {
|
||||
\"auth\": \"\${AUTH}\"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
chmod 600 /root/.docker/config.json
|
||||
echo 'credentials written'
|
||||
"
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Verify push works**
|
||||
|
||||
```bash
|
||||
ssh koala "
|
||||
echo 'FROM alpine:3.21' | buildctl --addr unix:///run/buildkit/buildkitd.sock build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=/ \
|
||||
--opt filename=Dockerfile \
|
||||
--output type=image,name=gitea.d-ma.be/mathias/supervisor:push-test,push=true
|
||||
echo 'push OK'
|
||||
"
|
||||
```
|
||||
|
||||
Expected: `push OK`. Verify in Gitea UI: `https://gitea.d-ma.be/mathias/supervisor/packages` should show a `push-test` tag.
|
||||
|
||||
- [ ] **Step 4: Delete the test image tag**
|
||||
|
||||
In Gitea UI → supervisor repo → Packages tab → delete the `push-test` tag.
|
||||
|
||||
---
|
||||
|
||||
## Task 4: age keypair + Flux SOPS decryption [kubectl + flamingo]
|
||||
|
||||
Flux decrypts SOPS-encrypted secrets at apply time. It needs the age private key stored as a k8s Secret in the `flux-system` namespace.
|
||||
|
||||
- [ ] **Step 1: Verify age is installed**
|
||||
|
||||
```bash
|
||||
age --version || brew install age
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Generate age keypair**
|
||||
|
||||
```bash
|
||||
age-keygen -o /tmp/supervisor-age.key
|
||||
cat /tmp/supervisor-age.key
|
||||
```
|
||||
|
||||
Output includes two lines:
|
||||
```
|
||||
# public key: age1xxxxxx...
|
||||
AGE-SECRET-KEY-1xxxxxxx...
|
||||
```
|
||||
|
||||
**Copy the public key** (the `age1...` value) — you'll need it in Task 7 for encrypting secrets.
|
||||
**Store the private key file securely** — back it up outside the cluster (e.g., 1Password or encrypted note).
|
||||
|
||||
- [ ] **Step 3: Create the SOPS age secret in flux-system**
|
||||
|
||||
```bash
|
||||
kubectl create secret generic sops-age \
|
||||
--from-file=age.agekey=/tmp/supervisor-age.key \
|
||||
-n flux-system
|
||||
kubectl get secret sops-age -n flux-system
|
||||
```
|
||||
|
||||
Expected: secret exists with `age.agekey` key.
|
||||
|
||||
- [ ] **Step 4: Shred the temp key file**
|
||||
|
||||
```bash
|
||||
shred -u /tmp/supervisor-age.key
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Check what Flux Kustomization CRDs exist in the infra repo**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-sops-setup
|
||||
ls /tmp/infra-sops-setup/flux-system/
|
||||
```
|
||||
|
||||
Look for a `kustomization.yaml` or `gotk-sync.yaml` that defines the main Flux Kustomization resource pointing at the `clusters/koala/` path.
|
||||
|
||||
- [ ] **Step 6: Patch the Flux Kustomization to enable SOPS decryption**
|
||||
|
||||
Find the Kustomization resource that syncs `clusters/koala/`. It will look like:
|
||||
|
||||
```yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
spec:
|
||||
path: ./clusters/koala
|
||||
...
|
||||
```
|
||||
|
||||
Add the `decryption` block:
|
||||
|
||||
```yaml
|
||||
decryption:
|
||||
provider: sops
|
||||
secretRef:
|
||||
name: sops-age
|
||||
```
|
||||
|
||||
Edit the file in `/tmp/infra-sops-setup/flux-system/` and commit:
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-sops-setup
|
||||
# Edit the relevant Kustomization yaml to add decryption block (shown above)
|
||||
git add flux-system/
|
||||
git commit -m "feat: enable SOPS decryption via age key in flux-system"
|
||||
git push
|
||||
```
|
||||
|
||||
- [ ] **Step 7: Verify Flux picks up the change**
|
||||
|
||||
```bash
|
||||
flux reconcile source git flux-system
|
||||
flux get kustomizations
|
||||
```
|
||||
|
||||
Expected: `flux-system` Kustomization shows `Ready True` with no errors.
|
||||
|
||||
- [ ] **Step 8: Clean up temp clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-sops-setup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Infra repo — supervisor app manifests [infra-repo]
|
||||
|
||||
Create the full k8s manifest set for the supervisor service in the infra repo. The deployment uses an `IMAGE_TAG` placeholder; the CD job patches this with the actual git sha before pushing.
|
||||
|
||||
**Prerequisites:** age public key from Task 4 Step 2.
|
||||
|
||||
- [ ] **Step 1: Clone the infra repo**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-supervisor
|
||||
cd /tmp/infra-supervisor
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create namespace**
|
||||
|
||||
```bash
|
||||
mkdir -p apps/supervisor
|
||||
cat > apps/supervisor/namespace.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: supervisor
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Create deployment**
|
||||
|
||||
The `brain` volume is a `hostPath` on koala (simplest for a single-node service; add a PVC later if needed). The image uses `imagePullSecrets` to pull from the Gitea registry.
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/deployment.yaml << 'EOF'
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: supervisor
|
||||
namespace: supervisor
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: supervisor
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: supervisor
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: koala
|
||||
imagePullSecrets:
|
||||
- name: gitea-registry
|
||||
containers:
|
||||
- name: supervisor
|
||||
image: gitea.d-ma.be/mathias/supervisor:IMAGE_TAG
|
||||
ports:
|
||||
- containerPort: 3200
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: supervisor-secrets
|
||||
env:
|
||||
- name: SUPERVISOR_PORT
|
||||
value: "3200"
|
||||
- name: LITELLM_BASE_URL
|
||||
value: "http://iguana:4000"
|
||||
- name: LLAMA_SWAP_URL
|
||||
value: "http://koala:8080"
|
||||
- name: INGEST_BASE_URL
|
||||
value: "http://localhost:3300"
|
||||
volumeMounts:
|
||||
- name: brain
|
||||
mountPath: /app/brain
|
||||
volumes:
|
||||
- name: brain
|
||||
hostPath:
|
||||
path: /var/lib/supervisor/brain
|
||||
type: DirectoryOrCreate
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create service**
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/service.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: supervisor
|
||||
namespace: supervisor
|
||||
spec:
|
||||
selector:
|
||||
app: supervisor
|
||||
ports:
|
||||
- port: 3200
|
||||
targetPort: 3200
|
||||
type: ClusterIP
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Create kustomization.yaml for supervisor**
|
||||
|
||||
```bash
|
||||
cat > apps/supervisor/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- secrets.enc.yaml
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Ensure clusters/koala/kustomization.yaml exists and includes supervisor**
|
||||
|
||||
Check if the file exists:
|
||||
|
||||
```bash
|
||||
cat clusters/koala/kustomization.yaml 2>/dev/null || echo "need to create"
|
||||
```
|
||||
|
||||
If it exists, add supervisor and imagepullsecret resources. If it does not exist, create it:
|
||||
|
||||
```bash
|
||||
cat > clusters/koala/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ../../apps/imagepullsecret
|
||||
- ../../apps/supervisor
|
||||
EOF
|
||||
```
|
||||
|
||||
If it already exists, add the two resource lines (preserving existing entries).
|
||||
|
||||
- [ ] **Step 7: Commit (without secrets — those come in Task 6)**
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
git add apps/supervisor/ clusters/koala/
|
||||
git commit -m "feat(supervisor): add k8s manifests for supervisor service"
|
||||
git push
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 6: SOPS-encrypted secrets in infra repo [infra-repo + flamingo]
|
||||
|
||||
Two encrypted secret files: the imagePullSecret for the Gitea container registry, and the supervisor app secrets (ANTHROPIC_API_KEY, LITELLM_API_KEY).
|
||||
|
||||
**Prerequisites:**
|
||||
- age public key from Task 4 Step 2 (format: `age1xxxxx...`)
|
||||
- `sops` installed (`brew install sops` if missing)
|
||||
- Gitea registry token (same one used in Task 3, or create a read-only one for pulling)
|
||||
|
||||
- [ ] **Step 1: Verify sops is installed**
|
||||
|
||||
```bash
|
||||
sops --version || brew install sops
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create .sops.yaml in infra repo root**
|
||||
|
||||
This tells sops which key to use for all files in the repo:
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
cat > .sops.yaml << 'EOF'
|
||||
creation_rules:
|
||||
- age: age1REPLACE_WITH_YOUR_PUBLIC_KEY
|
||||
EOF
|
||||
git add .sops.yaml
|
||||
git commit -m "chore: add sops config (age key)"
|
||||
git push
|
||||
```
|
||||
|
||||
Replace `age1REPLACE_WITH_YOUR_PUBLIC_KEY` with the actual age public key from Task 4.
|
||||
|
||||
- [ ] **Step 3: Create and encrypt the imagePullSecret**
|
||||
|
||||
The imagePullSecret is a namespace-less Secret (it will be targeted per namespace via Kustomize). Create it in the `imagepullsecret` app:
|
||||
|
||||
```bash
|
||||
mkdir -p apps/imagepullsecret
|
||||
|
||||
# Create a registry pull token in Gitea: Settings → Applications → Generate Token
|
||||
# Scopes: read:packages
|
||||
# Use that token here (or reuse the buildkit-push token — read access is enough for pulling)
|
||||
PULL_TOKEN=<gitea-read-packages-token>
|
||||
PULL_AUTH=$(echo -n "mathias:${PULL_TOKEN}" | base64)
|
||||
|
||||
cat > /tmp/gitea-pull-secret.yaml << EOF
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: gitea-registry
|
||||
namespace: supervisor
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
stringData:
|
||||
.dockerconfigjson: |
|
||||
{
|
||||
"auths": {
|
||||
"gitea.d-ma.be": {
|
||||
"auth": "${PULL_AUTH}"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
sops --encrypt /tmp/gitea-pull-secret.yaml > apps/imagepullsecret/secret.enc.yaml
|
||||
rm /tmp/gitea-pull-secret.yaml
|
||||
|
||||
cat > apps/imagepullsecret/kustomization.yaml << 'EOF'
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- secret.enc.yaml
|
||||
EOF
|
||||
```
|
||||
|
||||
Verify the encrypted file looks correct (should show `sops:` metadata at the bottom):
|
||||
|
||||
```bash
|
||||
tail -20 apps/imagepullsecret/secret.enc.yaml
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Create and encrypt supervisor app secrets**
|
||||
|
||||
```bash
|
||||
# ANTHROPIC_API_KEY: your Anthropic API key
|
||||
# LITELLM_API_KEY: the key your LiteLLM instance expects (can be any string if it's local)
|
||||
cat > /tmp/supervisor-secrets.yaml << 'EOF'
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: supervisor-secrets
|
||||
namespace: supervisor
|
||||
type: Opaque
|
||||
stringData:
|
||||
ANTHROPIC_API_KEY: "REPLACE_WITH_REAL_KEY"
|
||||
LITELLM_API_KEY: "REPLACE_WITH_REAL_KEY"
|
||||
EOF
|
||||
|
||||
# Edit /tmp/supervisor-secrets.yaml to insert real values, then:
|
||||
sops --encrypt /tmp/supervisor-secrets.yaml > apps/supervisor/secrets.enc.yaml
|
||||
rm /tmp/supervisor-secrets.yaml
|
||||
```
|
||||
|
||||
Verify:
|
||||
|
||||
```bash
|
||||
tail -20 apps/supervisor/secrets.enc.yaml
|
||||
# Should show encrypted values and sops metadata
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Commit encrypted secrets**
|
||||
|
||||
```bash
|
||||
cd /tmp/infra-supervisor
|
||||
git add apps/imagepullsecret/ apps/supervisor/secrets.enc.yaml .sops.yaml
|
||||
git commit -m "feat: add SOPS-encrypted imagePullSecret and supervisor app secrets"
|
||||
git push
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Verify Flux reconciles and creates the secrets**
|
||||
|
||||
Wait ~60s then:
|
||||
|
||||
```bash
|
||||
flux reconcile kustomization flux-system --with-source
|
||||
kubectl get secrets -n supervisor
|
||||
```
|
||||
|
||||
Expected: `gitea-registry` and `supervisor-secrets` appear in the `supervisor` namespace.
|
||||
|
||||
- [ ] **Step 7: Clean up temp clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-supervisor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Gitea org-level secrets [gitea-ui + koala-ssh]
|
||||
|
||||
Set the three secrets that all repos in the `mathias` org will inherit. These go in the Gitea org (not individual repos).
|
||||
|
||||
**Files:** No files — Gitea UI configuration.
|
||||
|
||||
- [ ] **Step 1: Generate SSH deploy key for infra repo**
|
||||
|
||||
On flamingo:
|
||||
|
||||
```bash
|
||||
ssh-keygen -t ed25519 -C "cd-bot infra deploy key" -f /tmp/infra-deploy-key -N ""
|
||||
cat /tmp/infra-deploy-key # private key → INFRA_DEPLOY_KEY secret
|
||||
cat /tmp/infra-deploy-key.pub # public key → add to Gitea infra repo as deploy key
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add public key to infra repo as a deploy key (write access)**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/mathias/infra` → Settings → Deploy Keys → Add Deploy Key.
|
||||
- Title: `cd-bot`
|
||||
- Key: paste content of `/tmp/infra-deploy-key.pub`
|
||||
- Enable write access: ✓
|
||||
|
||||
- [ ] **Step 3: Set org-level secrets in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/org/mathias/settings/secrets` → Add Secret.
|
||||
|
||||
Set these three secrets:
|
||||
|
||||
| Secret name | Value |
|
||||
|-------------|-------|
|
||||
| `INFRA_DEPLOY_KEY` | content of `/tmp/infra-deploy-key` (private key, including `-----BEGIN...` lines) |
|
||||
| `BUILDKIT_REGISTRY_AUTH` | same base64 auth string as used in Task 3 Step 2 (format: `mathias:<token>` base64-encoded) |
|
||||
|
||||
Note: `BUILDKIT_REGISTRY_AUTH` is redundant if `/root/.docker/config.json` is already on the runner host from Task 3 — but setting it as a secret allows the `cd.yml` to explicitly pass it to `buildctl` for clarity and rotation.
|
||||
|
||||
- [ ] **Step 4: Clean up temp key files**
|
||||
|
||||
```bash
|
||||
shred -u /tmp/infra-deploy-key /tmp/infra-deploy-key.pub
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Verify secrets appear in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/org/mathias/settings/secrets` — confirm both secrets are listed (values are hidden, only names shown).
|
||||
|
||||
---
|
||||
|
||||
## Task 8: cd.yml workflow [this-repo]
|
||||
|
||||
Create the CD workflow that triggers after CI passes, builds the image with buildctl, and commits the updated tag to the infra repo.
|
||||
|
||||
**Files:**
|
||||
- Create: `.gitea/workflows/cd.yml`
|
||||
|
||||
- [ ] **Step 1: Create cd.yml**
|
||||
|
||||
```bash
|
||||
cat > .gitea/workflows/cd.yml << 'EOF'
|
||||
name: cd
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Build and deploy
|
||||
needs: [check] # 'check' is the job name in ci.yml
|
||||
runs-on: self-hosted
|
||||
env:
|
||||
SERVICE: supervisor
|
||||
REGISTRY: gitea.d-ma.be
|
||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and push image
|
||||
run: |
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||
--output "type=image,name=${IMAGE}:${IMAGE_TAG},push=true"
|
||||
echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
|
||||
id: build
|
||||
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
# Write SSH key for infra repo
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.INFRA_DEPLOY_KEY }}" > ~/.ssh/infra_deploy_key
|
||||
chmod 600 ~/.ssh/infra_deploy_key
|
||||
ssh-keyscan gitea.d-ma.be >> ~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
# Clone infra repo
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git clone "${INFRA_REPO}" /tmp/infra-update
|
||||
|
||||
# Patch the image tag
|
||||
cd /tmp/infra-update
|
||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||
"apps/${SERVICE}/deployment.yaml"
|
||||
|
||||
# Commit and push
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add "apps/${SERVICE}/deployment.yaml"
|
||||
git commit -m "chore(deploy): ${SERVICE} → ${IMAGE_TAG}"
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git push
|
||||
|
||||
# Clean up
|
||||
rm -rf /tmp/infra-update
|
||||
rm ~/.ssh/infra_deploy_key
|
||||
echo "Infra repo updated: ${SERVICE} → ${IMAGE_TAG}"
|
||||
EOF
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Verify the `needs` job name matches ci.yml**
|
||||
|
||||
```bash
|
||||
grep "^ [a-z].*:$" .gitea/workflows/ci.yml
|
||||
```
|
||||
|
||||
The output should show `check:` as the quality-gate job name. The `cd.yml` uses `needs: [check]` — confirm this matches.
|
||||
|
||||
- [ ] **Step 3: Commit**
|
||||
|
||||
```bash
|
||||
git add .gitea/workflows/cd.yml
|
||||
git commit -m "feat: add CD workflow (buildctl → Gitea registry → infra repo update)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 9: End-to-end smoke test
|
||||
|
||||
Trigger the full pipeline and verify each stage.
|
||||
|
||||
- [ ] **Step 1: Push to main to trigger CI + CD**
|
||||
|
||||
```bash
|
||||
git push origin main
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Monitor CI job in Gitea**
|
||||
|
||||
Open `https://gitea.d-ma.be/mathias/supervisor/actions` — wait for the `ci` workflow `check` job to pass.
|
||||
|
||||
- [ ] **Step 3: Monitor CD job**
|
||||
|
||||
In the same actions view, the `cd` workflow should start after `ci` passes. Check the `Build and push image` step output for:
|
||||
|
||||
```
|
||||
Building gitea.d-ma.be/mathias/supervisor:<sha>
|
||||
```
|
||||
|
||||
And the `Update infra repo` step for:
|
||||
|
||||
```
|
||||
Infra repo updated: supervisor → <sha>
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Verify image in Gitea registry**
|
||||
|
||||
```
|
||||
https://gitea.d-ma.be/mathias/supervisor/packages
|
||||
```
|
||||
|
||||
Should show a new tag matching the commit sha.
|
||||
|
||||
- [ ] **Step 5: Verify infra repo commit**
|
||||
|
||||
```bash
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra-verify
|
||||
cd /tmp/infra-verify
|
||||
git log --oneline -3
|
||||
```
|
||||
|
||||
Expected: most recent commit message is `chore(deploy): supervisor → <sha>`.
|
||||
|
||||
```bash
|
||||
grep "image:" apps/supervisor/deployment.yaml
|
||||
```
|
||||
|
||||
Expected: `image: gitea.d-ma.be/mathias/supervisor:<sha>`
|
||||
|
||||
- [ ] **Step 6: Verify Flux reconciles**
|
||||
|
||||
```bash
|
||||
flux get kustomizations
|
||||
```
|
||||
|
||||
Expected: `flux-system` shows `Ready True` and `Applied revision: main/<infra-sha>`.
|
||||
|
||||
```bash
|
||||
kubectl get pods -n supervisor
|
||||
```
|
||||
|
||||
Expected: supervisor pod is `Running` with the new image sha.
|
||||
|
||||
- [ ] **Step 7: Verify pod started correctly**
|
||||
|
||||
```bash
|
||||
kubectl logs -n supervisor deployment/supervisor --tail=20
|
||||
```
|
||||
|
||||
Expected: supervisor startup logs (MCP server listening on port 3200, no errors).
|
||||
|
||||
- [ ] **Step 8: Clean up verify clone**
|
||||
|
||||
```bash
|
||||
rm -rf /tmp/infra-verify
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task 10: Post-deploy — registry retention policy [gitea-ui]
|
||||
|
||||
Prevent the Gitea container registry from filling up by setting a tag retention policy.
|
||||
|
||||
- [ ] **Step 1: Set tag retention in Gitea**
|
||||
|
||||
In Gitea UI: `https://gitea.d-ma.be/mathias/supervisor` → Settings → Packages → Container Registry.
|
||||
|
||||
Set: Keep last **20** tags per image name.
|
||||
|
||||
If Gitea does not expose a UI retention policy, note this for manual cleanup and open a task to automate it (e.g., a weekly Actions job that calls `docker image prune` via the Gitea API).
|
||||
|
||||
- [ ] **Step 2: Verify existing test tags are cleaned up**
|
||||
|
||||
Manually delete any test tags pushed during Task 3 if not already done.
|
||||
|
||||
---
|
||||
|
||||
## Self-review checklist (for plan author — not a task)
|
||||
|
||||
- [x] **Spec coverage:** BuildKit systemd ✓, cd.yml ✓, Flux SOPS ✓, infra repo structure ✓, imagePullSecret ✓, app secrets ✓, Gitea org secrets ✓, error handling (implicit in workflow failures) ✓, registry retention ✓, smoke test ✓
|
||||
- [x] **Placeholders:** `REPLACE_WITH_YOUR_PUBLIC_KEY` and `REPLACE_WITH_REAL_KEY` are intentional — real values come from user's secrets; marked clearly
|
||||
- [x] **Type consistency:** No shared types across tasks (infra-only plan)
|
||||
- [x] **Known gaps:** `needs: [check]` assumes ci.yml job name is `check` — verified in Task 8 Step 2. The `sed` image tag patch assumes no other image line in deployment.yaml — the deployment template only has one `image:` line.
|
||||
218
docs/superpowers/specs/2026-04-20-cd-pipeline-design.md
Normal file
218
docs/superpowers/specs/2026-04-20-cd-pipeline-design.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# CD Pipeline Design
|
||||
|
||||
**Date:** 2026-04-20
|
||||
**Status:** Approved for implementation
|
||||
|
||||
## Problem statement
|
||||
|
||||
The supervisor (and future services on the koala k3s cluster) have no automated deployment path after CI passes. Images are not built, the cluster is updated manually, and there is no audit trail for what is running where.
|
||||
|
||||
## Goal
|
||||
|
||||
After a push to `main` passes CI, automatically build a container image, push it to the Gitea registry, and update the cluster via GitOps — with a design that scales to many repos and services without per-repo kubeconfig or secret sprawl.
|
||||
|
||||
## Success criteria
|
||||
|
||||
- [ ] Successful `main` push triggers image build and push to `gitea.d-ma.be/<org>/<repo>:<git-sha>`
|
||||
- [ ] Infra repo receives a commit updating the image tag for the deployed service
|
||||
- [ ] Flux reconciles within 60s of the infra repo commit; pod runs the new image
|
||||
- [ ] Rollback = one commit to infra repo reverting the tag
|
||||
- [ ] Secrets (app secrets, registry pull) are SOPS-encrypted in infra repo; no manual `kubectl create secret`
|
||||
- [ ] Adding a new service requires only: adding `apps/<service>/` to infra repo + `cd.yml` to the app repo
|
||||
- [ ] Zero changes to the k3s cluster networking or runner configuration
|
||||
|
||||
## Constraints
|
||||
|
||||
- Gitea Actions self-hosted runner runs as a **systemd host process** on koala — not a k8s pod; cannot use cluster DNS
|
||||
- k3s uses containerd; no Docker daemon, no nerdctl on koala
|
||||
- Flux is already running (core controllers only); image-reflector/image-automation are NOT installed and will NOT be added
|
||||
- SOPS + age is the secret management standard; no plaintext Secrets in git
|
||||
- All org-level Gitea secrets are shared across repos — minimize the set
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Multi-cluster promotion (koala only for now; infra repo structure supports adding clusters later)
|
||||
- Automated rollback on health check failure (manual rollback via infra repo commit)
|
||||
- Build caching beyond BuildKit's local disk cache
|
||||
- PR preview environments
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
App repo (supervisor, n8n, etc.)
|
||||
↓ push to main
|
||||
Gitea Actions — ci.yml (lint + test)
|
||||
↓ passes
|
||||
Gitea Actions — cd.yml
|
||||
├─ 1. buildctl → BuildKit (unix socket on koala host)
|
||||
│ → pushes gitea.d-ma.be/<org>/<repo>:<git-sha>
|
||||
├─ 2. Clone infra repo (SSH deploy key)
|
||||
│ → patch apps/<service>/deployment.yaml IMAGE_TAG → <git-sha>
|
||||
│ → git commit + push
|
||||
└─ done
|
||||
|
||||
gitea.d-ma.be/mathias/infra (Flux source)
|
||||
↓ Flux source-controller detects new commit (30s interval)
|
||||
kustomize-controller
|
||||
└─ applies apps/<service>/kustomization.yaml → k3s namespace
|
||||
↓
|
||||
pod runs new image (pulls from gitea.d-ma.be with imagePullSecret)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components
|
||||
|
||||
### 1. BuildKit — systemd service on koala
|
||||
|
||||
BuildKit runs as a rootless systemd service on the koala host, identical to the Gitea runner pattern already in use.
|
||||
|
||||
- Socket: `unix:///run/user/<uid>/buildkit/buildkitd.sock` (rootless) or `/run/buildkit/buildkitd.sock` (root)
|
||||
- Cache: local disk at default BuildKit cache path — persists across builds
|
||||
- Access: `buildctl --addr unix:///run/buildkit/buildkitd.sock` from the runner process (same host, same user)
|
||||
- No k3s involvement for builds
|
||||
|
||||
### 2. Gitea Actions — `cd.yml`
|
||||
|
||||
Separate workflow file; triggers on `main` push after `ci.yml` succeeds.
|
||||
|
||||
```yaml
|
||||
name: cd
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
needs: [ci] # or workflow_run trigger — see implementation plan
|
||||
runs-on: [self-hosted, koala]
|
||||
env:
|
||||
IMAGE: gitea.d-ma.be/${{ github.repository }}:${{ github.sha }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build and push
|
||||
run: |
|
||||
buildctl --addr unix:///run/buildkit/buildkitd.sock \
|
||||
build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--output type=image,name=$IMAGE,push=true
|
||||
env:
|
||||
BUILDKIT_HOST: unix:///run/buildkit/buildkitd.sock
|
||||
- name: Update infra repo
|
||||
run: |
|
||||
git clone git@gitea.d-ma.be:mathias/infra.git /tmp/infra
|
||||
cd /tmp/infra
|
||||
sed -i "s|IMAGE_TAG|${{ github.sha }}|g" apps/${{ env.SERVICE_NAME }}/deployment.yaml
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add apps/${{ env.SERVICE_NAME }}/deployment.yaml
|
||||
git commit -m "chore(deploy): ${{ env.SERVICE_NAME }} → ${{ github.sha }}"
|
||||
git push
|
||||
env:
|
||||
GIT_SSH_COMMAND: ssh -i /tmp/infra-deploy-key -o StrictHostKeyChecking=no
|
||||
```
|
||||
|
||||
`SERVICE_NAME` is set per-repo (either hardcoded in `cd.yml` or derived from the repo name).
|
||||
|
||||
### 3. Org-level Gitea secrets
|
||||
|
||||
Three secrets, set once, inherited by all repos:
|
||||
|
||||
| Secret | Purpose |
|
||||
|--------|---------|
|
||||
| `BUILDKIT_REGISTRY_AUTH` | credentials for pushing to `gitea.d-ma.be` (buildctl `--opt` or `~/.docker/config.json`) |
|
||||
| `INFRA_DEPLOY_KEY` | SSH private key with write access to `gitea.d-ma.be/mathias/infra` |
|
||||
| `KUBECONFIG_KOALA` | (optional) kubeconfig for manual `kubectl` steps if ever needed; scoped ServiceAccount |
|
||||
|
||||
### 4. Infra repo structure
|
||||
|
||||
```
|
||||
gitea.d-ma.be/mathias/infra
|
||||
├── clusters/
|
||||
│ └── koala/
|
||||
│ └── kustomization.yaml # points at ../../apps/*/
|
||||
├── apps/
|
||||
│ ├── supervisor/
|
||||
│ │ ├── namespace.yaml
|
||||
│ │ ├── deployment.yaml # image: gitea.d-ma.be/mathias/supervisor:IMAGE_TAG
|
||||
│ │ ├── service.yaml
|
||||
│ │ ├── secrets.enc.yaml # SOPS-encrypted app secrets (ANTHROPIC_API_KEY, etc.)
|
||||
│ │ └── kustomization.yaml
|
||||
│ ├── n8n/
|
||||
│ │ └── ...
|
||||
│ └── imagepullsecret/
|
||||
│ └── secret.enc.yaml # SOPS-encrypted imagePullSecret for gitea.d-ma.be
|
||||
└── flux-system/ # existing Flux bootstrap manifests
|
||||
```
|
||||
|
||||
Adding a new service = add `apps/<service>/` directory. The `clusters/koala/kustomization.yaml` uses a glob or explicit list.
|
||||
|
||||
### 5. SOPS + age for Flux
|
||||
|
||||
Flux decrypts SOPS-encrypted files at apply time using an age key stored as a k8s Secret in the `flux-system` namespace. Setup:
|
||||
|
||||
1. Generate age keypair: `age-keygen`
|
||||
2. Store private key: `kubectl create secret generic sops-age --from-file=age.agekey -n flux-system`
|
||||
3. Configure Flux Kustomization with `decryption.provider: sops`
|
||||
4. Encrypt secrets before committing: `sops --encrypt --age <pubkey> secret.yaml > secret.enc.yaml`
|
||||
|
||||
App secrets (e.g., `ANTHROPIC_API_KEY`) and the registry pull secret live as encrypted files in `apps/<service>/` and `apps/imagepullsecret/` respectively.
|
||||
|
||||
### 6. Image pull secret
|
||||
|
||||
Each app namespace needs a `kubernetes.io/dockerconfigjson` Secret to pull from `gitea.d-ma.be`. This Secret is SOPS-encrypted in `apps/imagepullsecret/` and applied to each app namespace via Kustomize `namespace` field or a shared Kustomize component.
|
||||
|
||||
---
|
||||
|
||||
## Data flow: supervisor deploy
|
||||
|
||||
1. Push to `supervisor` main → CI passes (lint/test/vet)
|
||||
2. CD job builds image: `gitea.d-ma.be/mathias/supervisor:abc1234`
|
||||
3. CD job clones infra repo, patches `apps/supervisor/deployment.yaml`, commits
|
||||
4. Flux source-controller detects infra commit within 30s
|
||||
5. kustomize-controller applies `apps/supervisor/kustomization.yaml`
|
||||
6. Flux decrypts `secrets.enc.yaml` → k8s Secret in `supervisor` namespace
|
||||
7. k3s pulls `gitea.d-ma.be/mathias/supervisor:abc1234` using imagePullSecret
|
||||
8. Pod starts with new image; previous pod terminates
|
||||
|
||||
Rollback: `git revert <tag-commit>` in infra repo → Flux reconciles → old image deployed.
|
||||
|
||||
---
|
||||
|
||||
## Error handling
|
||||
|
||||
| Scenario | Behaviour |
|
||||
|----------|-----------|
|
||||
| CI fails | `cd.yml` does not run (`needs: ci` gate) |
|
||||
| BuildKit unreachable | `buildctl` exits non-zero → workflow fails; infra repo untouched |
|
||||
| Image push fails | Workflow fails; infra repo untouched; cluster unchanged |
|
||||
| Infra repo push conflict | Retry once with rebase; fail and alert if still conflicting |
|
||||
| Flux reconcile error | Notification-controller fires alert; pods stay on previous image |
|
||||
| Pod image pull fails | `ImagePullBackOff`; Flux reports degraded Kustomization |
|
||||
| SOPS decrypt fails | Kustomization fails; Flux reports error; no partial apply |
|
||||
|
||||
---
|
||||
|
||||
## Testing approach
|
||||
|
||||
1. **BuildKit smoke test** — `buildctl build` with a trivial one-line Dockerfile; verify image appears in Gitea registry
|
||||
2. **cd.yml dry run** — trigger manually on a test branch; verify infra repo commit contains correct sha
|
||||
3. **Flux reconcile test** — push infra commit; verify `flux get kustomizations` shows `Ready` and pod runs new image sha
|
||||
4. **Pull secret test** — delete pod, verify it restarts and pulls from Gitea registry without `ImagePullBackOff`
|
||||
5. **SOPS round-trip test** — encrypt a dummy secret, push to infra repo, verify Flux decrypts and `kubectl get secret` shows correct data
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| BuildKit socket path varies by user/rootless mode | Confirm path during setup; hardcode in `cd.yml` |
|
||||
| Infra repo concurrent pushes (multiple repos deploying simultaneously) | Git rebase retry handles this; unlikely at current scale |
|
||||
| age private key lost | Back up to SOPS-accessible location; document recovery procedure |
|
||||
| Registry storage fills up | Set Gitea registry tag retention policy (keep last 20 per repo) |
|
||||
| Gitea deploy key compromised | Rotate via Gitea UI; single key for infra repo only |
|
||||
322
docs/superpowers/specs/2026-04-20-model-orchestration-design.md
Normal file
322
docs/superpowers/specs/2026-04-20-model-orchestration-design.md
Normal file
@@ -0,0 +1,322 @@
|
||||
# Model Orchestration Design
|
||||
|
||||
**Date:** 2026-04-20
|
||||
**Status:** Approved for implementation
|
||||
|
||||
## Problem statement
|
||||
|
||||
The hyperguild supervisor currently spawns a `claude --print` subprocess for every skill call. The model routing config (`models.yaml`) exists but is dead weight — the model name is injected as text into the task prompt and ignored. Every skill call costs Claude tokens regardless of task complexity or data sensitivity.
|
||||
|
||||
## Goal
|
||||
|
||||
Route skill work to the most appropriate model — weighing cost, latency, and quality — with Claude acting as the real supervisor: verifying outputs and deciding when to escalate. Local models on owned hardware handle the common case; Claude escalates through a chain to frontier models only when local quality is insufficient.
|
||||
|
||||
## Success criteria
|
||||
|
||||
- [ ] Each skill dispatches generation to its configured local model via LiteLLM by default
|
||||
- [ ] Claude verifies every local output and either accepts or escalates
|
||||
- [ ] Escalation walks a per-skill chain (local small → local large → Sonnet → Opus) with one attempt per tier
|
||||
- [ ] Every attempt (model, tier, duration, warm state, verdict) is logged in the session JSONL
|
||||
- [ ] Cloud tiers (Sonnet/Opus) self-certify — no separate verifier call
|
||||
- [ ] Zero changes to skill handlers — they call `ExecutorFn` exactly as today
|
||||
- [ ] `LiteLTMBaseURL` already in config; no new env vars required beyond `LLAMA_SWAP_URL`
|
||||
|
||||
## Constraints
|
||||
|
||||
- One attempt per tier before escalating (no retry within a tier)
|
||||
- Anthropic T&C: Claude is called normally via Anthropic API; local models are called directly via LiteLLM HTTP — no API redirection
|
||||
- `models.yaml` remains the single routing config file
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Auto-rerouting based on real-time warm state (logged, not acted on — Phase 4)
|
||||
- Multi-tenant / public service exposure
|
||||
- RAG/CAG model boosting
|
||||
- Managed Agent cloud delegation (chain stub only in Phase 3)
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
MCP tool call (Claude Code)
|
||||
↓
|
||||
Skill handler — calls ExecutorFn (unchanged)
|
||||
↓
|
||||
Orchestrator.Run (implements ExecutorFn)
|
||||
├─ Resolve chain from models.yaml
|
||||
├─ For each model in chain:
|
||||
│ ├─ [ollama/*] → LiteLLM executor → generate
|
||||
│ │ ↓
|
||||
│ │ Claude verifier (task + output + discipline)
|
||||
│ │ ├─ accept → return Result (log attempt)
|
||||
│ │ └─ escalate → next tier (log attempt)
|
||||
│ │
|
||||
│ └─ [claude-*] → Claude executor (current) → generate + self-certify
|
||||
│ └─ return Result (log attempt)
|
||||
│
|
||||
└─ All tiers exhausted → return best attempt with escalation note
|
||||
```
|
||||
|
||||
Claude is always the verifier for local tiers. At cloud tiers, Claude generates and self-certifies — the verifier call is skipped.
|
||||
|
||||
---
|
||||
|
||||
## Components
|
||||
|
||||
### 1. `internal/exec/litellm.go` — LiteLLM executor
|
||||
|
||||
Calls `POST /v1/chat/completions` on the configured LiteLLM server. Implements the same `ExecutorFn` signature as the existing claude executor.
|
||||
|
||||
```go
|
||||
type LiteLLMExecutor struct {
|
||||
BaseURL string
|
||||
APIKey string
|
||||
HTTPClient *http.Client
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor
|
||||
|
||||
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error)
|
||||
```
|
||||
|
||||
Request mapping:
|
||||
- `req.SkillPrompt` → system message
|
||||
- `req.TaskPrompt` → user message
|
||||
- `req.Model` → `model` field in the chat completions request
|
||||
|
||||
Response handling: local models are prompted (via the discipline file output contract) to return a JSON object matching the `Result` schema. The executor attempts `json.Unmarshal` into `Result` directly — no envelope unwrapping needed (unlike the `--output-format json` claude envelope). If unmarshalling fails, the executor returns an error that the orchestrator treats as an automatic escalation trigger.
|
||||
|
||||
### 2. `internal/exec/verifier.go` — Claude verifier
|
||||
|
||||
A focused Claude call that judges local model output. Uses the existing `Executor` (claude subprocess) internally.
|
||||
|
||||
```go
|
||||
type Verdict struct {
|
||||
Accept bool `json:"accept"`
|
||||
Feedback string `json:"feedback"` // reason if not accepting; empty if accept
|
||||
}
|
||||
|
||||
type Verifier struct {
|
||||
executor *Executor // the existing claude executor
|
||||
}
|
||||
|
||||
func NewVerifier(executor *Executor) *Verifier
|
||||
|
||||
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
||||
```
|
||||
|
||||
The verifier prompt gives Claude:
|
||||
1. The skill discipline file (so it knows the iron laws and output contract)
|
||||
2. The original task prompt (informed verification — Claude sees what was asked)
|
||||
3. The generated output
|
||||
4. A short instruction: "Does this output satisfy the discipline's iron laws and output contract? Reply with JSON: `{\"accept\": true|false, \"feedback\": \"...\"}`"
|
||||
|
||||
The verifier uses a lightweight JSON schema for its own output (a `Verdict` schema), keeping the call fast.
|
||||
|
||||
### 3. `internal/exec/orchestrator.go` — chain walker
|
||||
|
||||
Implements `ExecutorFn`. Walks the escalation chain, delegating generation and verification per tier.
|
||||
|
||||
```go
|
||||
type Chain []ChainEntry
|
||||
|
||||
type ChainEntry struct {
|
||||
Model string // e.g. "ollama/phi4", "claude-sonnet-4-5"
|
||||
Tier string // "local" | "subagent" | "managed"
|
||||
IsCloud bool // true for claude-* models; skips verifier
|
||||
}
|
||||
|
||||
type Orchestrator struct {
|
||||
chain Chain
|
||||
litellm *LiteLLMExecutor
|
||||
claude *Executor
|
||||
verifier *Verifier
|
||||
llamaSwapURL string // for warm-state probe
|
||||
}
|
||||
|
||||
func NewOrchestrator(chain Chain, litellm *LiteLLMExecutor, claude *Executor, verifier *Verifier, llamaSwapURL string) *Orchestrator
|
||||
|
||||
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error)
|
||||
```
|
||||
|
||||
Algorithm:
|
||||
```
|
||||
for each entry in chain:
|
||||
warm = probe llama-swap (if local tier)
|
||||
start = now()
|
||||
if entry.IsCloud:
|
||||
result, err = claude.Run(ctx, req with entry.Model)
|
||||
log attempt(model, tier, duration, warm, verified=true)
|
||||
if err == nil: return result
|
||||
else:
|
||||
result, err = litellm.Run(ctx, req with entry.Model)
|
||||
duration = now() - start
|
||||
if err != nil:
|
||||
log attempt(model, tier, duration, warm, verified=false)
|
||||
continue // automatic escalation on parse/network error
|
||||
verdict = verifier.Verify(ctx, req.SkillPrompt, req.TaskPrompt, result)
|
||||
log attempt(model, tier, duration, warm, verified=verdict.Accept)
|
||||
if verdict.Accept: return result
|
||||
// inject verifier feedback into next tier's task prompt
|
||||
req.TaskPrompt = req.TaskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
||||
|
||||
return error("all tiers exhausted")
|
||||
```
|
||||
|
||||
### 4. `internal/config/models.go` — chain parser
|
||||
|
||||
Replaces the current single-model resolution with chain parsing.
|
||||
|
||||
Updated `models.yaml` format:
|
||||
|
||||
```yaml
|
||||
verifier: claude-sonnet-4-6 # fixed verifier for all local tiers
|
||||
|
||||
llama_swap_url: http://koala:8080 # for warm-state probing
|
||||
|
||||
default_chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-5
|
||||
|
||||
skills:
|
||||
tdd:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-5
|
||||
review:
|
||||
chain:
|
||||
- ollama/devstral-tuned
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-5
|
||||
debug:
|
||||
chain:
|
||||
- ollama/deepseek-r1-tuned
|
||||
- claude-sonnet-4-5
|
||||
spec:
|
||||
chain:
|
||||
- ollama/phi4
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-5
|
||||
- claude-opus-4-6
|
||||
retrospective:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-5
|
||||
trainer:
|
||||
chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-5
|
||||
```
|
||||
|
||||
The parser exposes:
|
||||
```go
|
||||
func (m *Models) ChainFor(skill string) Chain
|
||||
func (m *Models) Verifier() string
|
||||
func (m *Models) LlamaSwapURL() string
|
||||
```
|
||||
|
||||
Caller override (`model` param in MCP tool call) pins the chain to a single entry — one model, no escalation. This preserves the existing override behaviour for power users.
|
||||
|
||||
### 5. `internal/session/session.go` — updated `Attempt` struct
|
||||
|
||||
```go
|
||||
type Attempt struct {
|
||||
Attempt int `json:"attempt"`
|
||||
Model string `json:"model"`
|
||||
Tier string `json:"tier"` // local | subagent | managed
|
||||
DurationMs int64 `json:"duration_ms"`
|
||||
WarmStart bool `json:"warm_start"` // model was already loaded in llama-swap
|
||||
Verified bool `json:"verified"`
|
||||
Verdict string `json:"verdict,omitempty"` // accept | escalate | error
|
||||
Feedback string `json:"feedback,omitempty"` // verifier feedback on escalation
|
||||
OutputSummary string `json:"output_summary,omitempty"`
|
||||
RunnerOutput string `json:"runner_output,omitempty"`
|
||||
}
|
||||
```
|
||||
|
||||
### 6. `cmd/supervisor/main.go` — one wiring change
|
||||
|
||||
```go
|
||||
// Before:
|
||||
reg.Register(review.New(review.Config{ExecutorFn: executor.Run, ...}))
|
||||
|
||||
// After:
|
||||
chain := models.ChainFor("review")
|
||||
orch := exec.NewOrchestrator(chain, litellmExec, claudeExec, verifier, models.LlamaSwapURL())
|
||||
reg.Register(review.New(review.Config{ExecutorFn: orch.Run, ...}))
|
||||
```
|
||||
|
||||
One orchestrator per skill, sharing the same `litellmExec`, `claudeExec`, and `verifier` instances.
|
||||
|
||||
---
|
||||
|
||||
## Data flow example: `review` skill call
|
||||
|
||||
1. Claude Code calls `review` tool with `files: ["internal/foo.go"]`
|
||||
2. Skill handler builds task prompt, calls `orch.Run`
|
||||
3. Orchestrator resolves chain: `[devstral, gemma4, sonnet]`
|
||||
4. Probes llama-swap: devstral is warm
|
||||
5. LiteLLM calls devstral → returns JSON result
|
||||
6. Verifier asks Claude: "does this review satisfy the iron laws?"
|
||||
7. Claude: `{"accept": false, "feedback": "missing line references for all findings"}`
|
||||
8. Orchestrator logs attempt #1 (devstral, local, 4200ms, warm, escalate)
|
||||
9. Injects feedback into task prompt, calls gemma4
|
||||
10. Verifier: `{"accept": true}`
|
||||
11. Orchestrator logs attempt #2 (gemma4, local, 6100ms, cold, accept)
|
||||
12. Returns result to skill handler → MCP response
|
||||
|
||||
Session JSONL records both attempts. You can see: devstral was warm but produced weak output; gemma4 was cold but passed.
|
||||
|
||||
---
|
||||
|
||||
## Observability
|
||||
|
||||
Session JSONL is the primary store. Each `Entry.Attempts` slice records the full escalation trail. To analyse across sessions:
|
||||
|
||||
```bash
|
||||
# Which models are escalating most?
|
||||
jq -r '.attempts[] | select(.verdict == "escalate") | .model' brain/sessions/*.jsonl | sort | uniq -c
|
||||
|
||||
# Average latency per model
|
||||
jq -r '.attempts[] | [.model, .duration_ms] | @tsv' brain/sessions/*.jsonl | awk '{sum[$1]+=$2; n[$1]++} END {for (m in sum) print m, sum[m]/n[m]}'
|
||||
|
||||
# Cold start frequency
|
||||
jq -r '.attempts[] | select(.warm_start == false) | .model' brain/sessions/*.jsonl | sort | uniq -c
|
||||
```
|
||||
|
||||
No new metrics infrastructure needed for Phase 3. Phase 4 can build a dashboard on top of this data.
|
||||
|
||||
---
|
||||
|
||||
## Error handling
|
||||
|
||||
| Scenario | Behaviour |
|
||||
|----------|-----------|
|
||||
| LiteLLM unreachable | Log attempt as error, escalate immediately |
|
||||
| Local model returns unparseable JSON | Log attempt as error, escalate |
|
||||
| Verifier call fails | Log, treat as escalate (safe default) |
|
||||
| All tiers exhausted | Return error to skill handler; skill returns MCP error to caller |
|
||||
| Caller passes `model` override | Single-entry chain, no escalation, no verifier call |
|
||||
|
||||
---
|
||||
|
||||
## Testing approach
|
||||
|
||||
- `TestLiteLLMExecutor`: mock HTTP server returning valid/invalid JSON; verify parse logic and error escalation
|
||||
- `TestVerifier`: fake claude executor returning accept/escalate verdicts; verify prompt construction
|
||||
- `TestOrchestrator`: table-driven — chains of 1/2/3 tiers, various accept/escalate/error combinations; verify attempt log contents and final result
|
||||
- `TestModelsChainFor`: YAML parsing for all skill overrides and default_chain fallback
|
||||
- Integration smoke test: start real LiteLLM (or mock), call `review` tool via MCP, verify attempt log written
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
| Risk | Mitigation |
|
||||
|------|------------|
|
||||
| Local models ignore output contract → bad JSON | Discipline files already specify JSON output contract; parse failure auto-escalates |
|
||||
| Verifier Claude call adds latency to every local attempt | Verifier prompt is small and fast; acceptable tradeoff for quality gate |
|
||||
| llama-swap warm probe adds overhead | Probe is a single lightweight HTTP GET; timeout at 200ms, treat failure as `warm_start: false` |
|
||||
| Chain exhaustion leaves caller with no result | Return structured error via MCP; caller can retry with explicit `model` override |
|
||||
@@ -7,9 +7,15 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type skillChain struct {
|
||||
Chain []string `yaml:"chain"`
|
||||
}
|
||||
|
||||
type modelsFile struct {
|
||||
Default string `yaml:"default"`
|
||||
Skills map[string]string `yaml:"skills"`
|
||||
Verifier string `yaml:"verifier"`
|
||||
LlamaSwapURL string `yaml:"llama_swap_url"`
|
||||
DefaultChain []string `yaml:"default_chain"`
|
||||
Skills map[string]skillChain `yaml:"skills"`
|
||||
}
|
||||
|
||||
type Models struct {
|
||||
@@ -28,16 +34,23 @@ func LoadModels(path string) (Models, error) {
|
||||
return Models{data: f}, nil
|
||||
}
|
||||
|
||||
// Resolve returns the model for a skill, respecting three-layer priority:
|
||||
// 1. override (from MCP call) — highest
|
||||
// 2. per-skill default from models.yaml
|
||||
// 3. global default
|
||||
func (m Models) Resolve(skill, override string) string {
|
||||
// Verifier returns the model name to use for all local-tier output verification.
|
||||
func (m Models) Verifier() string { return m.data.Verifier }
|
||||
|
||||
// LlamaSwapURL returns the llama-swap base URL for warm-state probing.
|
||||
func (m Models) LlamaSwapURL() string { return m.data.LlamaSwapURL }
|
||||
|
||||
// ChainFor returns the ordered list of model names for a skill.
|
||||
// If override is non-empty, returns a single-entry chain (no escalation).
|
||||
// Falls back to default_chain when the skill has no explicit entry.
|
||||
func (m Models) ChainFor(skill, override string) []string {
|
||||
if override != "" {
|
||||
return override
|
||||
return []string{override}
|
||||
}
|
||||
if model, ok := m.data.Skills[skill]; ok {
|
||||
return model
|
||||
if sc, ok := m.data.Skills[skill]; ok && len(sc.Chain) > 0 {
|
||||
return sc.Chain
|
||||
}
|
||||
return m.data.Default
|
||||
out := make([]string, len(m.data.DefaultChain))
|
||||
copy(out, m.data.DefaultChain)
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -10,35 +10,71 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestModelsResolve(t *testing.T) {
|
||||
yaml := `
|
||||
default: ollama/default-model
|
||||
const testYAML = `
|
||||
verifier: claude-sonnet-4-6
|
||||
llama_swap_url: http://koala:8080
|
||||
|
||||
default_chain:
|
||||
- ollama/qwen3-coder-30b-tuned
|
||||
- claude-sonnet-4-6
|
||||
|
||||
skills:
|
||||
tdd: ollama/qwen3-coder-30b-tuned
|
||||
review: ollama/devstral-tuned
|
||||
review:
|
||||
chain:
|
||||
- ollama/devstral-tuned
|
||||
- ollama/gemma4
|
||||
- claude-sonnet-4-6
|
||||
spec:
|
||||
chain:
|
||||
- ollama/phi4
|
||||
- claude-opus-4-6
|
||||
`
|
||||
|
||||
func writeModels(t *testing.T, content string) string {
|
||||
t.Helper()
|
||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
||||
|
||||
m, err := config.LoadModels(f)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", m.Resolve("tdd", ""))
|
||||
assert.Equal(t, "ollama/devstral-tuned", m.Resolve("review", ""))
|
||||
assert.Equal(t, "ollama/default-model", m.Resolve("unknown", ""))
|
||||
require.NoError(t, os.WriteFile(f, []byte(content), 0644))
|
||||
return f
|
||||
}
|
||||
|
||||
func TestModelsOverride(t *testing.T) {
|
||||
yaml := `
|
||||
default: ollama/default-model
|
||||
skills:
|
||||
tdd: ollama/qwen3-coder-30b-tuned
|
||||
`
|
||||
f := filepath.Join(t.TempDir(), "models.yaml")
|
||||
require.NoError(t, os.WriteFile(f, []byte(yaml), 0644))
|
||||
func TestModelsVerifier(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "claude-sonnet-4-6", m.Verifier())
|
||||
}
|
||||
|
||||
m, err := config.LoadModels(f)
|
||||
func TestModelsLlamaSwapURL(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "http://koala:8080", m.LlamaSwapURL())
|
||||
}
|
||||
|
||||
func TestModelsChainForSkillOverride(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, "anthropic/claude-sonnet-4-6", m.Resolve("tdd", "anthropic/claude-sonnet-4-6"))
|
||||
chain := m.ChainFor("review", "")
|
||||
require.Len(t, chain, 3)
|
||||
assert.Equal(t, "ollama/devstral-tuned", chain[0])
|
||||
assert.Equal(t, "ollama/gemma4", chain[1])
|
||||
assert.Equal(t, "claude-sonnet-4-6", chain[2])
|
||||
}
|
||||
|
||||
func TestModelsChainForDefaultFallback(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
chain := m.ChainFor("trainer", "") // not in skills map
|
||||
require.Len(t, chain, 2)
|
||||
assert.Equal(t, "ollama/qwen3-coder-30b-tuned", chain[0])
|
||||
assert.Equal(t, "claude-sonnet-4-6", chain[1])
|
||||
}
|
||||
|
||||
func TestModelsChainForCallerOverride(t *testing.T) {
|
||||
m, err := config.LoadModels(writeModels(t, testYAML))
|
||||
require.NoError(t, err)
|
||||
|
||||
chain := m.ChainFor("review", "claude-opus-4-6")
|
||||
require.Len(t, chain, 1)
|
||||
assert.Equal(t, "claude-opus-4-6", chain[0])
|
||||
}
|
||||
|
||||
@@ -72,8 +72,11 @@ func (e *Executor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
"--tools", tools,
|
||||
"--json-schema", Schema,
|
||||
"--output-format", "json",
|
||||
prompt,
|
||||
}
|
||||
if strings.HasPrefix(req.Model, "claude-") {
|
||||
args = append(args, "--model", req.Model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, e.cfg.ClaudeBinary, args...)
|
||||
cmd.Env = append(os.Environ(), "LITELLM_API_KEY="+e.cfg.LiteLLMAPIKey)
|
||||
|
||||
@@ -75,3 +75,58 @@ func TestExecutorTimesOut(t *testing.T) {
|
||||
_, err := ex.Run(context.Background(), iexec.Request{TaskPrompt: "slow"})
|
||||
assert.ErrorContains(t, err, "timeout")
|
||||
}
|
||||
|
||||
func TestExecutorPassesModelFlagForCloudModel(t *testing.T) {
|
||||
// The script captures its args to a temp file so we can assert --model was passed.
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"claude-sonnet-4-6","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "claude-sonnet-4-6",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, string(argsData), "--model claude-sonnet-4-6")
|
||||
}
|
||||
|
||||
func TestExecutorSkipsModelFlagForLocalModel(t *testing.T) {
|
||||
argsFile := filepath.Join(t.TempDir(), "args.txt")
|
||||
envelope := `{"type":"result","subtype":"success","is_error":false,"structured_output":{"status":"pass","phase":"review","skill":"review","file_path":"","runner_output":"","verified":true,"model_used":"ollama/devstral","message":"ok"}}`
|
||||
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := "#!/bin/sh\necho \"$@\" > " + argsFile + "\necho '" + envelope + "'\n"
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
|
||||
ex := iexec.New(iexec.Config{
|
||||
ClaudeBinary: script,
|
||||
SystemPrompt: "sys",
|
||||
Timeout: 5 * time.Second,
|
||||
})
|
||||
|
||||
_, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "do review",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
argsData, err := os.ReadFile(argsFile)
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, string(argsData), "--model")
|
||||
}
|
||||
|
||||
103
internal/exec/litellm.go
Normal file
103
internal/exec/litellm.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
|
||||
// Local models are expected to return a JSON object matching the Result schema
|
||||
// as their response content — no envelope.
|
||||
type LiteLLMExecutor struct {
|
||||
baseURL string
|
||||
apiKey string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// NewLiteLLM creates a LiteLLMExecutor.
|
||||
// timeout applies to the full HTTP round-trip per call.
|
||||
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
|
||||
return &LiteLLMExecutor{
|
||||
baseURL: baseURL,
|
||||
apiKey: apiKey,
|
||||
httpClient: &http.Client{Timeout: timeout},
|
||||
}
|
||||
}
|
||||
|
||||
type litellmMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
type litellmRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []litellmMessage `json:"messages"`
|
||||
}
|
||||
|
||||
type litellmChoice struct {
|
||||
Message litellmMessage `json:"message"`
|
||||
}
|
||||
|
||||
type litellmResponse struct {
|
||||
Choices []litellmChoice `json:"choices"`
|
||||
}
|
||||
|
||||
// Run dispatches req to the LiteLLM server and parses the Result from the
|
||||
// assistant message content. Returns an error on network failure, non-200
|
||||
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
|
||||
// as automatic escalation triggers.
|
||||
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
|
||||
body := litellmRequest{
|
||||
Model: req.Model,
|
||||
Messages: []litellmMessage{
|
||||
{Role: "system", Content: req.SkillPrompt},
|
||||
{Role: "user", Content: req.TaskPrompt},
|
||||
},
|
||||
}
|
||||
|
||||
bodyBytes, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: create request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
if e.apiKey != "" {
|
||||
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
|
||||
}
|
||||
|
||||
resp, err := e.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var chatResp litellmResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: decode response: %w", err)
|
||||
}
|
||||
if len(chatResp.Choices) == 0 {
|
||||
return Result{}, fmt.Errorf("litellm: no choices in response")
|
||||
}
|
||||
|
||||
content := chatResp.Choices[0].Message.Content
|
||||
var result Result
|
||||
if err := json.Unmarshal([]byte(content), &result); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
|
||||
}
|
||||
if err := result.Validate(); err != nil {
|
||||
return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
112
internal/exec/litellm_test.go
Normal file
112
internal/exec/litellm_test.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func validLiteLLMResult() iexec.Result {
|
||||
return iexec.Result{
|
||||
Status: "pass",
|
||||
Phase: "review",
|
||||
Skill: "review",
|
||||
ModelUsed: "ollama/devstral",
|
||||
Message: "looks good",
|
||||
}
|
||||
}
|
||||
|
||||
func chatResponseFor(t *testing.T, result iexec.Result) []byte {
|
||||
t.Helper()
|
||||
content, err := json.Marshal(result)
|
||||
require.NoError(t, err)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": string(content)}},
|
||||
},
|
||||
}
|
||||
data, err := json.Marshal(resp)
|
||||
require.NoError(t, err)
|
||||
return data
|
||||
}
|
||||
|
||||
func TestLiteLLMParsesValidResult(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "/v1/chat/completions", r.URL.Path)
|
||||
assert.Equal(t, "application/json", r.Header.Get("Content-Type"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
result, err := ex.Run(context.Background(), iexec.Request{
|
||||
SkillPrompt: "review rules",
|
||||
TaskPrompt: "review the code",
|
||||
Model: "ollama/devstral",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
assert.Equal(t, "review", result.Skill)
|
||||
}
|
||||
|
||||
func TestLiteLLMSendsAuthHeader(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write(chatResponseFor(t, validLiteLLMResult()))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "secret", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t", SkillPrompt: "s"})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestLiteLLMErrorOnNonOKStatus(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
assert.ErrorContains(t, err, "503")
|
||||
}
|
||||
|
||||
func TestLiteLLMErrorOnUnparsableJSON(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
resp := map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"role": "assistant", "content": "not json at all"}},
|
||||
},
|
||||
}
|
||||
data, _ := json.Marshal(resp)
|
||||
_, _ = w.Write(data)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ex := iexec.NewLiteLLM(srv.URL, "", 5*time.Second)
|
||||
_, err := ex.Run(context.Background(), iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestLiteLLMRespectsContextCancellation(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // Cancel immediately
|
||||
|
||||
ex := iexec.NewLiteLLM("http://invalid.example.com", "", 1*time.Second)
|
||||
_, err := ex.Run(ctx, iexec.Request{Model: "x", TaskPrompt: "t"})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
197
internal/exec/orchestrator.go
Normal file
197
internal/exec/orchestrator.go
Normal file
@@ -0,0 +1,197 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ChainEntry is one tier in an escalation chain.
|
||||
type ChainEntry struct {
|
||||
Model string // e.g. "ollama/phi4", "claude-sonnet-4-6"
|
||||
Tier string // "local" | "subagent" | "managed"
|
||||
IsCloud bool // true for claude-* models; skips verifier call
|
||||
}
|
||||
|
||||
// EntryFor builds a ChainEntry from a model name string.
|
||||
func EntryFor(model string) ChainEntry {
|
||||
cloud := strings.HasPrefix(model, "claude-")
|
||||
tier := "local"
|
||||
if cloud {
|
||||
tier = "subagent"
|
||||
}
|
||||
return ChainEntry{Model: model, Tier: tier, IsCloud: cloud}
|
||||
}
|
||||
|
||||
// AttemptRecord captures the outcome of one tier attempt for session logging.
|
||||
type AttemptRecord struct {
|
||||
Model string
|
||||
Tier string
|
||||
DurationMs int64
|
||||
WarmStart bool
|
||||
Verdict string // "accept" | "escalate" | "error"
|
||||
Feedback string
|
||||
}
|
||||
|
||||
// VerifierFn is the interface the orchestrator uses to verify local output.
|
||||
type VerifierFn interface {
|
||||
Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error)
|
||||
}
|
||||
|
||||
// ExecutorRunFn is the signature of Executor.Run and LiteLLMExecutor.Run.
|
||||
type ExecutorRunFn func(ctx context.Context, req Request) (Result, error)
|
||||
|
||||
// Orchestrator walks an escalation chain, delegating generation and verification.
|
||||
// It implements the ExecutorFn shape expected by skill handlers.
|
||||
type Orchestrator struct {
|
||||
chain []ChainEntry
|
||||
localRun ExecutorRunFn // for local (non-cloud) tiers; may be nil
|
||||
cloudRun ExecutorRunFn // for cloud tiers; may be nil
|
||||
verifier VerifierFn
|
||||
llamaSwapURL string
|
||||
attempts *[]AttemptRecord
|
||||
}
|
||||
|
||||
// NewOrchestrator creates an Orchestrator.
|
||||
// attempts is a pointer to a slice that will be appended to on each tier attempt.
|
||||
// Pass nil for localRun or cloudRun if no tiers of that type exist in the chain.
|
||||
func NewOrchestrator(
|
||||
chain []ChainEntry,
|
||||
localRun ExecutorRunFn,
|
||||
cloudRun ExecutorRunFn,
|
||||
verifier VerifierFn,
|
||||
llamaSwapURL string,
|
||||
attempts *[]AttemptRecord,
|
||||
) *Orchestrator {
|
||||
return &Orchestrator{
|
||||
chain: chain,
|
||||
localRun: localRun,
|
||||
cloudRun: cloudRun,
|
||||
verifier: verifier,
|
||||
llamaSwapURL: llamaSwapURL,
|
||||
attempts: attempts,
|
||||
}
|
||||
}
|
||||
|
||||
// Run walks the escalation chain and returns the first accepted result.
|
||||
// Satisfies the ExecutorFn signature: func(context.Context, Request) (Result, error).
|
||||
func (o *Orchestrator) Run(ctx context.Context, req Request) (Result, error) {
|
||||
taskPrompt := req.TaskPrompt
|
||||
|
||||
for _, entry := range o.chain {
|
||||
warm := o.probeWarm(entry.Model)
|
||||
start := time.Now()
|
||||
|
||||
tierReq := req
|
||||
tierReq.Model = entry.Model
|
||||
tierReq.TaskPrompt = taskPrompt
|
||||
|
||||
if entry.IsCloud {
|
||||
result, genErr := o.cloudRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
verdict := "accept"
|
||||
if genErr != nil {
|
||||
verdict = "error"
|
||||
}
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: verdict,
|
||||
})
|
||||
if genErr == nil {
|
||||
return result, nil
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Local tier.
|
||||
result, genErr := o.localRun(ctx, tierReq)
|
||||
dur := time.Since(start).Milliseconds()
|
||||
|
||||
if genErr != nil {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "error",
|
||||
Feedback: genErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
verdict, verErr := o.verifier.Verify(ctx, req.SkillPrompt, taskPrompt, result)
|
||||
if verErr != nil {
|
||||
// Treat verifier failure as escalate (safe default).
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: "verifier error: " + verErr.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if verdict.Accept {
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "accept",
|
||||
})
|
||||
return result, nil
|
||||
}
|
||||
|
||||
o.appendAttempt(AttemptRecord{
|
||||
Model: entry.Model,
|
||||
Tier: entry.Tier,
|
||||
DurationMs: dur,
|
||||
WarmStart: warm,
|
||||
Verdict: "escalate",
|
||||
Feedback: verdict.Feedback,
|
||||
})
|
||||
// Inject verifier feedback into the next tier's task prompt.
|
||||
taskPrompt = taskPrompt + "\n\nPrior attempt feedback: " + verdict.Feedback
|
||||
}
|
||||
|
||||
return Result{}, fmt.Errorf("all tiers exhausted after %d attempt(s)", len(o.chain))
|
||||
}
|
||||
|
||||
func (o *Orchestrator) appendAttempt(rec AttemptRecord) {
|
||||
if o.attempts != nil {
|
||||
*o.attempts = append(*o.attempts, rec)
|
||||
}
|
||||
}
|
||||
|
||||
// probeWarm checks whether the model is currently loaded in llama-swap.
|
||||
// Returns false on any error or if llamaSwapURL is empty.
|
||||
func (o *Orchestrator) probeWarm(model string) bool {
|
||||
if o.llamaSwapURL == "" {
|
||||
return false
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.llamaSwapURL+"/v1/models", nil)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close() //nolint:errcheck
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(string(body), model)
|
||||
}
|
||||
151
internal/exec/orchestrator_test.go
Normal file
151
internal/exec/orchestrator_test.go
Normal file
@@ -0,0 +1,151 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// stubRunFn returns preset results sequentially.
|
||||
type stubRunFn struct {
|
||||
calls []stubCall
|
||||
callIdx int
|
||||
}
|
||||
|
||||
type stubCall struct {
|
||||
result iexec.Result
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *stubRunFn) Run(_ context.Context, _ iexec.Request) (iexec.Result, error) {
|
||||
if s.callIdx >= len(s.calls) {
|
||||
return iexec.Result{}, errors.New("unexpected call")
|
||||
}
|
||||
c := s.calls[s.callIdx]
|
||||
s.callIdx++
|
||||
return c.result, c.err
|
||||
}
|
||||
|
||||
// stubVerifier returns preset verdicts sequentially.
|
||||
type stubVerifier struct {
|
||||
verdicts []iexec.Verdict
|
||||
idx int
|
||||
}
|
||||
|
||||
func (s *stubVerifier) Verify(_ context.Context, _, _ string, _ iexec.Result) (iexec.Verdict, error) {
|
||||
if s.idx >= len(s.verdicts) {
|
||||
return iexec.Verdict{}, errors.New("unexpected verify call")
|
||||
}
|
||||
v := s.verdicts[s.idx]
|
||||
s.idx++
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func okResult(skill string) iexec.Result {
|
||||
return iexec.Result{Status: "pass", Phase: "review", Skill: skill, Message: "ok", ModelUsed: "m"}
|
||||
}
|
||||
|
||||
func TestOrchestratorSingleLocalAccept(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "local", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnVerifierReject(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{result: iexec.Result{Status: "fail", Phase: "review", Skill: "review", Message: "weak"}},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{
|
||||
{Accept: false, Feedback: "missing line refs"},
|
||||
{Accept: true},
|
||||
}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "escalate", attempts[0].Verdict)
|
||||
assert.Equal(t, "missing line refs", attempts[0].Feedback)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorEscalatesOnLocalError(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{
|
||||
{err: errors.New("network failure")},
|
||||
{result: okResult("review")},
|
||||
}}
|
||||
verifier := &stubVerifier{verdicts: []iexec.Verdict{{Accept: true}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{
|
||||
{Model: "ollama/devstral", Tier: "local", IsCloud: false},
|
||||
{Model: "ollama/gemma4", Tier: "local", IsCloud: false},
|
||||
},
|
||||
local.Run, nil, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, attempts, 2)
|
||||
assert.Equal(t, "error", attempts[0].Verdict)
|
||||
assert.Equal(t, "accept", attempts[1].Verdict)
|
||||
}
|
||||
|
||||
func TestOrchestratorCloudTierSelfCertifies(t *testing.T) {
|
||||
cloud := &stubRunFn{calls: []stubCall{{result: okResult("review")}}}
|
||||
verifier := &stubVerifier{} // no verdicts — must not be called
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "claude-sonnet-4-6", Tier: "subagent", IsCloud: true}},
|
||||
nil, cloud.Run, verifier, "", &attempts,
|
||||
)
|
||||
|
||||
result, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "pass", result.Status)
|
||||
require.Len(t, attempts, 1)
|
||||
assert.Equal(t, "subagent", attempts[0].Tier)
|
||||
assert.Equal(t, "accept", attempts[0].Verdict)
|
||||
assert.Equal(t, 0, verifier.idx) // verifier never called
|
||||
}
|
||||
|
||||
func TestOrchestratorAllTiersExhausted(t *testing.T) {
|
||||
local := &stubRunFn{calls: []stubCall{{err: errors.New("unavailable")}}}
|
||||
|
||||
var attempts []iexec.AttemptRecord
|
||||
orch := iexec.NewOrchestrator(
|
||||
[]iexec.ChainEntry{{Model: "ollama/devstral", Tier: "local", IsCloud: false}},
|
||||
local.Run, nil, &stubVerifier{}, "", &attempts,
|
||||
)
|
||||
|
||||
_, err := orch.Run(context.Background(), iexec.Request{TaskPrompt: "review"})
|
||||
assert.ErrorContains(t, err, "all tiers exhausted")
|
||||
}
|
||||
@@ -14,9 +14,10 @@ type Result struct {
|
||||
Skill string `json:"skill"` // tdd | review | ...
|
||||
FilePath string `json:"file_path"` // absolute path to generated file
|
||||
RunnerOutput string `json:"runner_output"` // raw stdout+stderr from test runner
|
||||
Verified bool `json:"verified"` // based on exit code, never self-report
|
||||
ModelUsed string `json:"model_used"` // model name or "self"
|
||||
Message string `json:"message"` // one sentence summary
|
||||
Verified bool `json:"verified"` // based on exit code, never self-report
|
||||
ModelUsed string `json:"model_used"` // model name or "self"
|
||||
Message string `json:"message"` // one sentence summary
|
||||
Attempts []AttemptRecord `json:"attempts,omitempty"` // populated by orchestrator, not Claude
|
||||
}
|
||||
|
||||
var validStatuses = map[string]bool{"pass": true, "fail": true, "error": true}
|
||||
|
||||
99
internal/exec/verifier.go
Normal file
99
internal/exec/verifier.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package exec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Verdict is the output of a Claude verification call.
|
||||
type Verdict struct {
|
||||
Accept bool `json:"accept"`
|
||||
Feedback string `json:"feedback"` // empty when Accept is true
|
||||
}
|
||||
|
||||
// Verifier runs a focused Claude call to judge local model output.
|
||||
type Verifier struct {
|
||||
claudeBinary string
|
||||
model string
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
||||
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
||||
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
||||
if claudeBinary == "" {
|
||||
claudeBinary = "claude"
|
||||
}
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
return &Verifier{
|
||||
claudeBinary: claudeBinary,
|
||||
model: model,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
||||
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
||||
// to escalate. Returns an error on subprocess failure or unparseable response.
|
||||
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
||||
defer cancel()
|
||||
|
||||
outputJSON, err := json.Marshal(output)
|
||||
if err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
||||
}
|
||||
|
||||
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
||||
|
||||
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
||||
|
||||
Reply with JSON only — no other text:
|
||||
{"accept": true, "feedback": ""}
|
||||
or
|
||||
{"accept": false, "feedback": "<one sentence reason>"}
|
||||
|
||||
## Skill discipline
|
||||
%s
|
||||
|
||||
## Original task
|
||||
%s
|
||||
|
||||
## Generated output
|
||||
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
||||
|
||||
args := []string{
|
||||
"--print",
|
||||
"--permission-mode", "bypassPermissions",
|
||||
}
|
||||
if v.model != "" {
|
||||
args = append(args, "--model", v.model)
|
||||
}
|
||||
args = append(args, prompt)
|
||||
|
||||
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
||||
cmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
||||
}
|
||||
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
var verdict Verdict
|
||||
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
||||
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
||||
}
|
||||
return verdict, nil
|
||||
}
|
||||
74
internal/exec/verifier_test.go
Normal file
74
internal/exec/verifier_test.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package exec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func fakeVerifierClaude(t *testing.T, verdict iexec.Verdict) string {
|
||||
t.Helper()
|
||||
data, err := json.Marshal(verdict)
|
||||
require.NoError(t, err)
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
content := fmt.Sprintf("#!/bin/sh\necho '%s'\n", string(data))
|
||||
require.NoError(t, os.WriteFile(script, []byte(content), 0755))
|
||||
return script
|
||||
}
|
||||
|
||||
func TestVerifierAccepts(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: true, Feedback: ""})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, verdict.Accept)
|
||||
assert.Empty(t, verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierEscalates(t *testing.T) {
|
||||
claude := fakeVerifierClaude(t, iexec.Verdict{Accept: false, Feedback: "missing line references"})
|
||||
v := iexec.NewVerifier(claude, "claude-sonnet-4-6", 5*time.Second)
|
||||
|
||||
verdict, err := v.Verify(context.Background(), "skill rules", "do the task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "incomplete",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, verdict.Accept)
|
||||
assert.Equal(t, "missing line references", verdict.Feedback)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnUnparsableOutput(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\necho 'not json'\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestVerifierErrorOnNonZeroExit(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
script := filepath.Join(dir, "claude")
|
||||
require.NoError(t, os.WriteFile(script, []byte("#!/bin/sh\nexit 1\n"), 0755))
|
||||
|
||||
v := iexec.NewVerifier(script, "claude-sonnet-4-6", 5*time.Second)
|
||||
_, err := v.Verify(context.Background(), "rules", "task", iexec.Result{
|
||||
Status: "pass", Phase: "review", Skill: "review", Message: "ok",
|
||||
})
|
||||
assert.Error(t, err)
|
||||
}
|
||||
26
internal/session/attempts.go
Normal file
26
internal/session/attempts.go
Normal file
@@ -0,0 +1,26 @@
|
||||
// internal/session/attempts.go
|
||||
package session
|
||||
|
||||
import iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
|
||||
// AttemptsFrom converts exec.AttemptRecord slice to session.Attempt slice
|
||||
// for writing into a session JSONL entry.
|
||||
func AttemptsFrom(records []iexec.AttemptRecord) []Attempt {
|
||||
if len(records) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]Attempt, len(records))
|
||||
for i, r := range records {
|
||||
out[i] = Attempt{
|
||||
Attempt: i + 1,
|
||||
Model: r.Model,
|
||||
Tier: r.Tier,
|
||||
DurationMs: r.DurationMs,
|
||||
WarmStart: r.WarmStart,
|
||||
Verdict: r.Verdict,
|
||||
Feedback: r.Feedback,
|
||||
Verified: r.Verdict == "accept",
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
37
internal/session/attempts_test.go
Normal file
37
internal/session/attempts_test.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package session_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAttemptsFromEmpty(t *testing.T) {
|
||||
result := session.AttemptsFrom(nil)
|
||||
assert.Empty(t, result)
|
||||
}
|
||||
|
||||
func TestAttemptsFromSetsIndex(t *testing.T) {
|
||||
records := []exec.AttemptRecord{
|
||||
{Model: "ollama/phi4", Tier: "local", DurationMs: 1200, WarmStart: true, Verdict: "escalate", Feedback: "too vague"},
|
||||
{Model: "claude-sonnet-4-6", Tier: "subagent", DurationMs: 3400, WarmStart: false, Verdict: "accept"},
|
||||
}
|
||||
result := session.AttemptsFrom(records)
|
||||
require.Len(t, result, 2)
|
||||
|
||||
assert.Equal(t, 1, result[0].Attempt)
|
||||
assert.Equal(t, "ollama/phi4", result[0].Model)
|
||||
assert.Equal(t, "local", result[0].Tier)
|
||||
assert.Equal(t, int64(1200), result[0].DurationMs)
|
||||
assert.True(t, result[0].WarmStart)
|
||||
assert.Equal(t, "escalate", result[0].Verdict)
|
||||
assert.Equal(t, "too vague", result[0].Feedback)
|
||||
assert.False(t, result[0].Verified)
|
||||
|
||||
assert.Equal(t, 2, result[1].Attempt)
|
||||
assert.Equal(t, "claude-sonnet-4-6", result[1].Model)
|
||||
assert.True(t, result[1].Verified)
|
||||
}
|
||||
@@ -36,3 +36,21 @@ func FormatHistory(entries []Entry, excludePhase string) string {
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// PrependHistory reads the session log for sessionID and prepends a formatted
|
||||
// history block to task. Returns task unchanged if sessionID or sessionsDir is
|
||||
// empty, or if no prior entries exist.
|
||||
func PrependHistory(sessionsDir, sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || sessionsDir == "" {
|
||||
return task
|
||||
}
|
||||
entries, err := Read(sessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
package session_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFormatHistoryEmpty(t *testing.T) {
|
||||
@@ -39,3 +41,45 @@ func TestFormatHistoryExcludesCurrentPhase(t *testing.T) {
|
||||
assert.Contains(t, result, "red done")
|
||||
assert.NotContains(t, result, "green done")
|
||||
}
|
||||
|
||||
func TestPrependHistoryNoSessionID(t *testing.T) {
|
||||
result := session.PrependHistory("", "", "review", "do the task")
|
||||
assert.Equal(t, "do the task", result)
|
||||
}
|
||||
|
||||
func TestPrependHistoryNoLog(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||
assert.Equal(t, "do the task", result)
|
||||
}
|
||||
|
||||
func TestPrependHistoryPrependsHistory(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
entry := session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||
FinalStatus: "pass", Message: "wrote test",
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
require.NoError(t, session.Append(dir, "sess-abc", entry))
|
||||
|
||||
result := session.PrependHistory(dir, "sess-abc", "review", "do the task")
|
||||
assert.Contains(t, result, "## Session history")
|
||||
assert.Contains(t, result, "wrote test")
|
||||
assert.True(t, strings.HasSuffix(result, "do the task"))
|
||||
}
|
||||
|
||||
func TestPrependHistoryExcludesCurrentPhase(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "red",
|
||||
FinalStatus: "pass", Message: "red done", Timestamp: time.Now(),
|
||||
}))
|
||||
require.NoError(t, session.Append(dir, "sess-abc", session.Entry{
|
||||
SessionID: "sess-abc", Skill: "tdd", Phase: "green",
|
||||
FinalStatus: "pass", Message: "green done", Timestamp: time.Now(),
|
||||
}))
|
||||
|
||||
result := session.PrependHistory(dir, "sess-abc", "green", "do the task")
|
||||
assert.Contains(t, result, "red done")
|
||||
assert.NotContains(t, result, "green done")
|
||||
}
|
||||
|
||||
@@ -32,9 +32,14 @@ type Entry struct {
|
||||
type Attempt struct {
|
||||
Attempt int `json:"attempt"`
|
||||
Model string `json:"model"`
|
||||
Tier string `json:"tier"` // local | subagent | managed
|
||||
DurationMs int64 `json:"duration_ms"`
|
||||
WarmStart bool `json:"warm_start"` // model already loaded in llama-swap
|
||||
Verified bool `json:"verified"`
|
||||
Verdict string `json:"verdict,omitempty"` // accept | escalate | error
|
||||
Feedback string `json:"feedback,omitempty"` // verifier feedback on escalation
|
||||
OutputSummary string `json:"output_summary,omitempty"`
|
||||
RunnerOutput string `json:"runner_output,omitempty"`
|
||||
Verified bool `json:"verified"`
|
||||
}
|
||||
|
||||
// Append writes entry as a single JSON line to sessionsDir/{sessionID}.jsonl.
|
||||
|
||||
@@ -61,3 +61,22 @@ func TestRead_EmptyWhenNoFile(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
assert.Empty(t, entries)
|
||||
}
|
||||
|
||||
func TestAttemptRoundTrip(t *testing.T) {
|
||||
a := session.Attempt{
|
||||
Attempt: 1,
|
||||
Model: "ollama/devstral",
|
||||
Tier: "local",
|
||||
DurationMs: 4200,
|
||||
WarmStart: true,
|
||||
Verified: false,
|
||||
Verdict: "escalate",
|
||||
Feedback: "missing line references",
|
||||
}
|
||||
data, err := json.Marshal(a)
|
||||
require.NoError(t, err)
|
||||
|
||||
var got session.Attempt
|
||||
require.NoError(t, json.Unmarshal(data, &got))
|
||||
assert.Equal(t, a, got)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -43,11 +44,12 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
"phase: debug\nproject_root: %s\nerror: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, a.Error, a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "debug", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "debug", task)
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
t0 := time.Now()
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
@@ -57,24 +59,25 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "debug",
|
||||
Phase: "debug",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
Attempts: session.AttemptsFrom(result.Attempts),
|
||||
FinalStatus: result.Status,
|
||||
ModelUsed: result.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: result.Message,
|
||||
})
|
||||
}
|
||||
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -52,6 +53,7 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
t0 := time.Now()
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: taskPrompt,
|
||||
@@ -62,6 +64,18 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
return nil, fmt.Errorf("retrospective worker: %w", err)
|
||||
}
|
||||
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "retrospective",
|
||||
Phase: "retrospective",
|
||||
Attempts: session.AttemptsFrom(result.Attempts),
|
||||
FinalStatus: result.Status,
|
||||
ModelUsed: result.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: result.Message,
|
||||
})
|
||||
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -44,11 +45,12 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
"phase: review\nproject_root: %s\nfiles: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, strings.Join(a.Files, ", "), a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "review", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "review", task)
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
t0 := time.Now()
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
@@ -58,24 +60,26 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "review",
|
||||
Phase: "review",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
Attempts: session.AttemptsFrom(result.Attempts),
|
||||
FinalStatus: result.Status,
|
||||
FilePath: result.FilePath,
|
||||
ModelUsed: result.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: result.Message,
|
||||
})
|
||||
}
|
||||
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -48,11 +49,12 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
||||
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
||||
)
|
||||
task = s.prependHistory(a.SessionID, "spec", task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "spec", task)
|
||||
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
}
|
||||
t0 := time.Now()
|
||||
result, err := s.cfg.ExecutorFn(ctx, iexec.Request{
|
||||
SkillPrompt: s.cfg.SkillPrompt,
|
||||
TaskPrompt: task,
|
||||
@@ -62,24 +64,26 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "spec",
|
||||
Phase: "spec",
|
||||
ProjectRoot: a.ProjectRoot,
|
||||
Attempts: session.AttemptsFrom(result.Attempts),
|
||||
FinalStatus: result.Status,
|
||||
FilePath: result.FilePath,
|
||||
ModelUsed: result.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: result.Message,
|
||||
})
|
||||
}
|
||||
|
||||
b, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -70,8 +71,15 @@ func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawM
|
||||
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
||||
)
|
||||
task = s.prependHistory(args.SessionID, "green", task)
|
||||
return s.execute(ctx, task)
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)
|
||||
|
||||
t0 := time.Now()
|
||||
result, err := s.execute(ctx, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "green", t0, result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type refactorArgs struct {
|
||||
@@ -101,23 +109,15 @@ func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.R
|
||||
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
||||
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
||||
)
|
||||
task = s.prependHistory(args.SessionID, "refactor", task)
|
||||
return s.execute(ctx, task)
|
||||
}
|
||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)
|
||||
|
||||
func (s *Skill) prependHistory(sessionID, currentPhase, task string) string {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return task
|
||||
t0 := time.Now()
|
||||
result, err := s.execute(ctx, task)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entries, err := session.Read(s.cfg.SessionsDir, sessionID)
|
||||
if err != nil || len(entries) == 0 {
|
||||
return task
|
||||
}
|
||||
history := session.FormatHistory(entries, currentPhase)
|
||||
if history == "" {
|
||||
return task
|
||||
}
|
||||
return history + "\n---\n\n" + task
|
||||
s.logAttempt(args.SessionID, args.ProjectRoot, "tdd", "refactor", t0, result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Skill) resolveModel(override string) string {
|
||||
@@ -127,6 +127,7 @@ func (s *Skill) resolveModel(override string) string {
|
||||
return s.cfg.DefaultModel
|
||||
}
|
||||
|
||||
// execute calls ExecutorFn and returns the marshaled result.
|
||||
func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, error) {
|
||||
if s.cfg.ExecutorFn == nil {
|
||||
return nil, fmt.Errorf("no executor configured")
|
||||
@@ -141,3 +142,28 @@ func (s *Skill) execute(ctx context.Context, task string) (json.RawMessage, erro
|
||||
}
|
||||
return json.Marshal(result)
|
||||
}
|
||||
|
||||
// logAttempt writes a session.Entry for a completed phase if session_id is set.
|
||||
// raw is the marshaled Result returned by execute; we unmarshal to extract fields.
|
||||
func (s *Skill) logAttempt(sessionID, projectRoot, skill, phase string, t0 time.Time, raw json.RawMessage) {
|
||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
||||
return
|
||||
}
|
||||
var result iexec.Result
|
||||
if err := json.Unmarshal(raw, &result); err != nil {
|
||||
return
|
||||
}
|
||||
_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
|
||||
SessionID: sessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: skill,
|
||||
Phase: phase,
|
||||
ProjectRoot: projectRoot,
|
||||
Attempts: session.AttemptsFrom(result.Attempts),
|
||||
FinalStatus: result.Status,
|
||||
FilePath: result.FilePath,
|
||||
ModelUsed: result.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: result.Message,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||
"github.com/mathiasbq/supervisor/internal/session"
|
||||
@@ -58,6 +59,7 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
}
|
||||
|
||||
// ── Step 2: Writer agent (receives reader candidates) ────────────────────
|
||||
t0 := time.Now()
|
||||
writerTask := fmt.Sprintf(
|
||||
"role: writer\nsession_id: %s\nbrain_dir: %s\n\nreader_summary: %s\nreader_candidates:\n%s",
|
||||
a.SessionID, s.cfg.BrainDir, readerResult.Message, readerResult.RunnerOutput,
|
||||
@@ -72,6 +74,18 @@ func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (
|
||||
return nil, fmt.Errorf("writer agent: %w", err)
|
||||
}
|
||||
|
||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
||||
SessionID: a.SessionID,
|
||||
Timestamp: time.Now(),
|
||||
Skill: "trainer",
|
||||
Phase: "trainer",
|
||||
Attempts: session.AttemptsFrom(writerResult.Attempts),
|
||||
FinalStatus: writerResult.Status,
|
||||
ModelUsed: writerResult.ModelUsed,
|
||||
DurationMs: time.Since(t0).Milliseconds(),
|
||||
Message: writerResult.Message,
|
||||
})
|
||||
|
||||
b, err := json.Marshal(writerResult)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal result: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user