package exec import ( "bytes" "context" "encoding/json" "fmt" "net/http" "strings" "time" ) // LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint // and returns the raw assistant message text. type LiteLLMExecutor struct { baseURL string apiKey string httpClient *http.Client } // NewLiteLLM creates a LiteLLMExecutor. // timeout applies to the full HTTP round-trip per call. func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor { if timeout == 0 { timeout = 120 * time.Second } return &LiteLLMExecutor{ baseURL: baseURL, apiKey: apiKey, httpClient: &http.Client{Timeout: timeout}, } } type litellmMessage struct { Role string `json:"role"` Content string `json:"content"` } type litellmRequest struct { Model string `json:"model"` Messages []litellmMessage `json:"messages"` } type litellmChoice struct { Message litellmMessage `json:"message"` } type litellmResponse struct { Choices []litellmChoice `json:"choices"` } // Complete sends system+user messages to the given model and returns the raw // assistant text along with the round-trip duration in milliseconds. func (e *LiteLLMExecutor) Complete(ctx context.Context, model, system, user string) (string, int64, error) { body := litellmRequest{ Model: model, Messages: []litellmMessage{ {Role: "system", Content: system}, {Role: "user", Content: user}, }, } bodyBytes, err := json.Marshal(body) if err != nil { return "", 0, fmt.Errorf("litellm: marshal request: %w", err) } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes)) if err != nil { return "", 0, fmt.Errorf("litellm: create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") if e.apiKey != "" { httpReq.Header.Set("Authorization", "Bearer "+e.apiKey) } t0 := time.Now() resp, err := e.httpClient.Do(httpReq) if err != nil { return "", 0, fmt.Errorf("litellm: request failed: %w", err) } defer resp.Body.Close() //nolint:errcheck durationMs := time.Since(t0).Milliseconds() if resp.StatusCode != http.StatusOK { return "", 0, fmt.Errorf("litellm: server returned status %d", resp.StatusCode) } var chatResp litellmResponse if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil { return "", 0, fmt.Errorf("litellm: decode response: %w", err) } if len(chatResp.Choices) == 0 { return "", 0, fmt.Errorf("litellm: no choices in response") } return stripResultJSON(chatResp.Choices[0].Message.Content), durationMs, nil } // stripResultJSON removes trailing ```json blocks that match the old structured // result schema (containing "status" and "phase" keys). Some local models produce // correct markdown prose but then append the old JSON format out of habit. func stripResultJSON(text string) string { const fence = "```json" idx := len(text) - 1 // Walk backwards past trailing whitespace. for idx >= 0 && (text[idx] == '\n' || text[idx] == '\r' || text[idx] == ' ') { idx-- } // Must end with closing fence. if idx < 2 || text[idx-2:idx+1] != "```" { return text } // Find the matching opening fence. start := len(text[:idx-2]) - 1 for start >= 0 { if start+len(fence) <= len(text) && text[start:start+len(fence)] == fence { block := text[start : idx+1] if strings.Contains(block, `"status"`) && strings.Contains(block, `"phase"`) { return strings.TrimRight(text[:start], " \t\r\n") } break } start-- } return text }