hyperguild/internal/exec/litellm.go

package exec

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"net/http"
	"time"
)

// LiteLLMExecutor calls a LiteLLM-compatible /v1/chat/completions endpoint.
// Local models are expected to return a JSON object matching the Result schema
// as their response content — no envelope.
type LiteLLMExecutor struct {
	baseURL    string
	apiKey     string
	httpClient *http.Client
}

// NewLiteLLM creates a LiteLLMExecutor.
// timeout applies to the full HTTP round-trip per call.
func NewLiteLLM(baseURL, apiKey string, timeout time.Duration) *LiteLLMExecutor {
	return &LiteLLMExecutor{
		baseURL: baseURL,
		apiKey:  apiKey,
		httpClient: &http.Client{Timeout: timeout},
	}
}

type litellmMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type litellmRequest struct {
	Model    string           `json:"model"`
	Messages []litellmMessage `json:"messages"`
}

type litellmChoice struct {
	Message litellmMessage `json:"message"`
}

type litellmResponse struct {
	Choices []litellmChoice `json:"choices"`
}

// Run dispatches req to the LiteLLM server and parses the Result from the
// assistant message content. Returns an error on network failure, non-200
// status, or unparseable/invalid JSON — all of which the Orchestrator treats
// as automatic escalation triggers.
func (e *LiteLLMExecutor) Run(ctx context.Context, req Request) (Result, error) {
	body := litellmRequest{
		Model: req.Model,
		Messages: []litellmMessage{
			{Role: "system", Content: req.SkillPrompt},
			{Role: "user", Content: req.TaskPrompt},
		},
	}

	bodyBytes, err := json.Marshal(body)
	if err != nil {
		return Result{}, fmt.Errorf("litellm: marshal request: %w", err)
	}

	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/v1/chat/completions", bytes.NewReader(bodyBytes))
	if err != nil {
		return Result{}, fmt.Errorf("litellm: create request: %w", err)
	}
	httpReq.Header.Set("Content-Type", "application/json")
	if e.apiKey != "" {
		httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
	}

	resp, err := e.httpClient.Do(httpReq)
	if err != nil {
		return Result{}, fmt.Errorf("litellm: request failed: %w", err)
	}
	defer resp.Body.Close() //nolint:errcheck

	if resp.StatusCode != http.StatusOK {
		return Result{}, fmt.Errorf("litellm: server returned status %d", resp.StatusCode)
	}

	var chatResp litellmResponse
	if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
		return Result{}, fmt.Errorf("litellm: decode response: %w", err)
	}
	if len(chatResp.Choices) == 0 {
		return Result{}, fmt.Errorf("litellm: no choices in response")
	}

	content := chatResp.Choices[0].Message.Content
	var result Result
	if err := json.Unmarshal([]byte(content), &result); err != nil {
		return Result{}, fmt.Errorf("litellm: parse result JSON: %w — content: %s", err, content)
	}
	if err := result.Validate(); err != nil {
		return Result{}, fmt.Errorf("litellm: invalid result: %w", err)
	}
	return result, nil
}