feat(ingestion): add OpenAI-compatible LLM HTTP client with 429 retry

2026-04-22 22:29:24 +02:00
parent ae5a4d04f0
commit bf8a3fc11c
2 changed files with 205 additions and 0 deletions
--- a/ingestion/internal/llm/client.go
+++ b/ingestion/internal/llm/client.go
@@ -0,0 +1,119 @@
+package llm
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// Client calls an OpenAI-compatible chat completions endpoint.
+type Client struct {
+	baseURL    string
+	apiKey     string
+	model      string
+	httpClient *http.Client
+}
+
+// New constructs a Client.
+func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
+	return &Client{
+		baseURL:    strings.TrimRight(baseURL, "/"),
+		apiKey:     apiKey,
+		model:      model,
+		httpClient: &http.Client{Timeout: timeout},
+	}
+}
+
+type chatRequest struct {
+	Model       string    `json:"model"`
+	Messages    []message `json:"messages"`
+	Temperature float64   `json:"temperature"`
+}
+
+type message struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type chatResponse struct {
+	Choices []struct {
+		Message message `json:"message"`
+	} `json:"choices"`
+}
+
+// Complete sends a system + user message and returns the assistant's reply.
+// Retries once on HTTP 429 using Retry-After header or 5s backoff.
+func (c *Client) Complete(ctx context.Context, system, user string) (string, error) {
+	body := chatRequest{
+		Model: c.model,
+		Messages: []message{
+			{Role: "system", Content: system},
+			{Role: "user", Content: user},
+		},
+		Temperature: 0.2,
+	}
+	b, err := json.Marshal(body)
+	if err != nil {
+		return "", fmt.Errorf("marshal request: %w", err)
+	}
+
+	do := func() (*http.Response, error) {
+		req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/chat/completions", bytes.NewReader(b))
+		if err != nil {
+			return nil, fmt.Errorf("build request: %w", err)
+		}
+		req.Header.Set("Content-Type", "application/json")
+		if c.apiKey != "" {
+			req.Header.Set("Authorization", "Bearer "+c.apiKey)
+		}
+		return c.httpClient.Do(req)
+	}
+
+	resp, err := do()
+	if err != nil {
+		return "", fmt.Errorf("call LLM: %w", err)
+	}
+
+	if resp.StatusCode == http.StatusTooManyRequests {
+		resp.Body.Close()
+		wait := 5 * time.Second
+		if ra := resp.Header.Get("Retry-After"); ra != "" {
+			if secs, err := strconv.Atoi(ra); err == nil {
+				wait = time.Duration(secs) * time.Second
+			}
+		}
+		select {
+		case <-ctx.Done():
+			return "", ctx.Err()
+		case <-time.After(wait):
+		}
+		resp, err = do()
+		if err != nil {
+			return "", fmt.Errorf("retry LLM call: %w", err)
+		}
+	}
+	defer resp.Body.Close()
+
+	out, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("read response: %w", err)
+	}
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("LLM returned %d: %s", resp.StatusCode, out)
+	}
+
+	var cr chatResponse
+	if err := json.Unmarshal(out, &cr); err != nil {
+		return "", fmt.Errorf("parse response: %w", err)
+	}
+	if len(cr.Choices) == 0 {
+		return "", fmt.Errorf("LLM returned no choices")
+	}
+	return cr.Choices[0].Message.Content, nil
+}