Files
hyperguild/internal/exec/litellm.go
Mathias Bergqvist ca8a691241
All checks were successful
cd / Build and deploy (push) Successful in 6s
CI / Lint / Test / Vet (push) Successful in 10s
CI / Mirror to GitHub (push) Successful in 3s
fix(exec): strip trailing result-schema JSON from local model output
Small models (phi4-mini) produce correct markdown analysis but then
append the old {status/phase/skill} JSON schema out of training habit.
stripResultJSON() detects and removes these trailing fences so Claude
Code receives clean prose regardless of model behaviour.

Non-schema json blocks (config examples etc) are preserved.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 16:55:53 +02:00

128 lines
3.5 KiB
Go

package exec
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
)
// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint
// and returns the raw assistant message text.
type LiteLLMExecutor struct {
baseURL string
apiKey string
httpClient *http.Client
}
// NewLiteLLM creates a LiteLLMExecutor.
// timeout applies to the full HTTP round-trip per call.
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
if timeout == 0 {
timeout = 120 * time.Second
}
return &LiteLLMExecutor{
baseURL: baseURL,
apiKey: apiKey,
httpClient: &http.Client{Timeout: timeout},
}
}
type litellmMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type litellmRequest struct {
Model string `json:"model"`
Messages []litellmMessage `json:"messages"`
}
type litellmChoice struct {
Message litellmMessage `json:"message"`
}
type litellmResponse struct {
Choices []litellmChoice `json:"choices"`
}
// Complete sends system+user messages to the given model and returns the raw
// assistant text along with the round-trip duration in milliseconds.
func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) {
body := litellmRequest{
Model: model,
Messages: []litellmMessage{
{Role: "system", Content: system},
{Role: "user", Content: user},
},
}
bodyBytes, err := json.Marshal(body)
if err != nil {
return "", 0, fmt.Errorf("litellm: marshal request: %w", err)
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
if err != nil {
return "", 0, fmt.Errorf("litellm: create request: %w", err)
}
httpReq.Header.Set("Content-Type", "application/json")
if e.apiKey != "" {
httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
}
t0 := time.Now()
resp, err := e.httpClient.Do(httpReq)
if err != nil {
return "", 0, fmt.Errorf("litellm: request failed: %w", err)
}
defer resp.Body.Close() //nolint:errcheck
durationMs := time.Since(t0).Milliseconds()
if resp.StatusCode != http.StatusOK {
return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
}
var chatResp litellmResponse
if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
return "", 0, fmt.Errorf("litellm: decode response: %w", err)
}
if len(chatResp.Choices) == 0 {
return "", 0, fmt.Errorf("litellm: no choices in response")
}
return stripResultJSON(chatResp.Choices[0].Message.Content), durationMs, nil
}
// stripResultJSON removes trailing ```json blocks that match the old structured
// result schema (containing "status" and "phase" keys). Some local models produce
// correct markdown prose but then append the old JSON format out of habit.
func stripResultJSON(text string) string {
const fence = "```json"
idx := len(text) - 1
// Walk backwards past trailing whitespace.
for idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' || text[idx] == ' ') {
idx--
}
// Must end with closing fence.
if idx < 2 || text[idx-2:idx+1] != "```" {
return text
}
// Find the matching opening fence.
start := len(text[:idx-2]) - 1
for start >= 0 {
if start+len(fence) <= len(text) && text[start:start+len(fence)] == fence {
block := text[start : idx+1]
if strings.Contains(block, `"status"`) && strings.Contains(block, `"phase"`) {
return strings.TrimRight(text[:start], " \t\r\n")
}
break
}
start--
}
return text
}