100 lines
2.7 KiB
Go
100 lines
2.7 KiB
Go
package exec
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"time"
|
|
)
|
|
|
|
// Verdict is the output of a Claude verification call.
|
|
type Verdict struct {
|
|
Accept bool `json:"accept"`
|
|
Feedback string `json:"feedback"` // empty when Accept is true
|
|
}
|
|
|
|
// Verifier runs a focused Claude call to judge local model output.
|
|
type Verifier struct {
|
|
claudeBinary string
|
|
model string
|
|
timeout time.Duration
|
|
}
|
|
|
|
// NewVerifier creates a Verifier that calls claude with the given binary path and model.
|
|
// Empty claudeBinary defaults to "claude". Zero timeout defaults to 30s.
|
|
func NewVerifier(claudeBinary, model string, timeout time.Duration) *Verifier {
|
|
if claudeBinary == "" {
|
|
claudeBinary = "claude"
|
|
}
|
|
if timeout == 0 {
|
|
timeout = 30 * time.Second
|
|
}
|
|
return &Verifier{
|
|
claudeBinary: claudeBinary,
|
|
model: model,
|
|
timeout: timeout,
|
|
}
|
|
}
|
|
|
|
// Verify asks Claude whether output satisfies the skill discipline's iron laws.
|
|
// Returns Verdict{Accept: true} to accept or Verdict{Accept: false, Feedback: "..."}
|
|
// to escalate. Returns an error on subprocess failure or unparseable response.
|
|
func (v *Verifier) Verify(ctx context.Context, skillPrompt, taskPrompt string, output Result) (Verdict, error) {
|
|
ctx, cancel := context.WithTimeout(ctx, v.timeout)
|
|
defer cancel()
|
|
|
|
outputJSON, err := json.Marshal(output)
|
|
if err != nil {
|
|
return Verdict{}, fmt.Errorf("verifier: marshal output: %w", err)
|
|
}
|
|
|
|
prompt := fmt.Sprintf(`You are a quality verifier for an AI supervisor system.
|
|
|
|
Given the skill discipline, the original task, and the generated output, decide whether the output satisfies the discipline's iron laws and output contract.
|
|
|
|
Reply with JSON only — no other text:
|
|
{"accept": true, "feedback": ""}
|
|
or
|
|
{"accept": false, "feedback": "<one sentence reason>"}
|
|
|
|
## Skill discipline
|
|
%s
|
|
|
|
## Original task
|
|
%s
|
|
|
|
## Generated output
|
|
%s`, skillPrompt, taskPrompt, string(outputJSON))
|
|
|
|
args := []string{
|
|
"--print",
|
|
"--permission-mode", "bypassPermissions",
|
|
}
|
|
if v.model != "" {
|
|
args = append(args, "--model", v.model)
|
|
}
|
|
args = append(args, prompt)
|
|
|
|
cmd := exec.CommandContext(ctx, v.claudeBinary, args...)
|
|
cmd.Env = os.Environ()
|
|
var stdout, stderr bytes.Buffer
|
|
cmd.Stdout = &stdout
|
|
cmd.Stderr = &stderr
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
if ctx.Err() != nil {
|
|
return Verdict{}, fmt.Errorf("verifier: timeout after %s", v.timeout)
|
|
}
|
|
return Verdict{}, fmt.Errorf("verifier: claude exited with error: %w — stderr: %s", err, stderr.String())
|
|
}
|
|
|
|
var verdict Verdict
|
|
if err := json.Unmarshal(bytes.TrimSpace(stdout.Bytes()), &verdict); err != nil {
|
|
return Verdict{}, fmt.Errorf("verifier: parse verdict JSON: %w — raw: %s", err, stdout.String())
|
|
}
|
|
return verdict, nil
|
|
}
|