feat(tools): pr_files_diff with caps

Returns per-file unified diff for a PR, capped at 20KB/file and 200KB
total response. Files exceeding per-file cap report truncated+omitted_lines;
files that would push the response over 200KB go to omitted_files.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mathias Bergqvist
2026-05-04 22:57:11 +02:00
parent d3d0fed6b1
commit e95e87e8e3
5 changed files with 435 additions and 0 deletions

View File

@@ -64,3 +64,40 @@ func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, index i
}
return &pr, nil
}
type PullRequestFile struct {
Filename string `json:"filename"`
Status string `json:"status"` // added | modified | deleted | renamed
Additions int `json:"additions"`
Deletions int `json:"deletions"`
}
func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, index int) ([]PullRequestFile, error) {
p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d/files", owner, repo, index)
body, status, err := c.GetJSON(ctx, p)
if err != nil {
return nil, err
}
if err := MapStatus(status, body); err != nil {
return nil, err
}
var files []PullRequestFile
if err := json.Unmarshal(body, &files); err != nil {
return nil, err
}
return files, nil
}
// GetPullRequestDiff returns the raw unified diff. The endpoint serves text/plain, not JSON,
// so we use doRaw to bypass the json Accept header expectation.
func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, index int) ([]byte, error) {
p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d.diff", owner, repo, index)
resp, err := c.doRaw(ctx, "GET", p, nil)
if err != nil {
return nil, err
}
if err := MapStatus(resp.Status, resp.Body); err != nil {
return nil, err
}
return resp.Body, nil
}

View File

@@ -93,3 +93,46 @@ func TestGetPullRequest(t *testing.T) {
assert.Equal(t, "open", pr.State)
assert.True(t, pr.Draft)
}
func TestGetPullRequestFiles(t *testing.T) {
filesJSON := `[
{"filename":"main.go","status":"modified","additions":10,"deletions":5},
{"filename":"README.md","status":"added","additions":20,"deletions":0}
]`
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, "/api/v1/repos/o/r/pulls/42/files", r.URL.Path)
assert.Equal(t, http.MethodGet, r.Method)
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(filesJSON))
}))
defer srv.Close()
c := gitea.NewClient(srv.URL, "tok")
files, err := c.GetPullRequestFiles(context.Background(), "o", "r", 42)
require.NoError(t, err)
require.Len(t, files, 2)
assert.Equal(t, "main.go", files[0].Filename)
assert.Equal(t, "modified", files[0].Status)
assert.Equal(t, 10, files[0].Additions)
assert.Equal(t, 5, files[0].Deletions)
assert.Equal(t, "README.md", files[1].Filename)
assert.Equal(t, "added", files[1].Status)
assert.Equal(t, 20, files[1].Additions)
assert.Equal(t, 0, files[1].Deletions)
}
func TestGetPullRequestDiff(t *testing.T) {
rawDiff := "diff --git a/main.go b/main.go\n--- a/main.go\n+++ b/main.go\n@@ -1,2 +1,3 @@\n+package main\n"
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, "/api/v1/repos/o/r/pulls/42.diff", r.URL.Path)
assert.Equal(t, http.MethodGet, r.Method)
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(rawDiff))
}))
defer srv.Close()
c := gitea.NewClient(srv.URL, "tok")
diff, err := c.GetPullRequestDiff(context.Background(), "o", "r", 42)
require.NoError(t, err)
assert.Equal(t, []byte(rawDiff), diff)
}

View File

@@ -0,0 +1,171 @@
package tools
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"strings"
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
"gitea.d-ma.be/mathias/gitea-mcp/internal/registry"
)
const (
maxFileDiffBytes = 20 * 1024
maxResponseBytes = 200 * 1024
)
type PRFilesDiff struct {
c *gitea.Client
a *allowlist.Allowlist
}
func NewPRFilesDiff(c *gitea.Client, a *allowlist.Allowlist) *PRFilesDiff {
return &PRFilesDiff{c: c, a: a}
}
func (t *PRFilesDiff) Descriptor() registry.ToolDescriptor {
return registry.ToolDescriptor{
Name: "pr_files_diff",
Description: "Get a pull request's per-file diff with size caps (20KB/file, 200KB total).",
InputSchema: json.RawMessage(`{
"type":"object",
"properties":{
"owner":{"type":"string"},
"name":{"type":"string"},
"number":{"type":"integer","minimum":1}
},
"required":["owner","name","number"]
}`),
}
}
type prFilesDiffArgs struct {
Owner string `json:"owner"`
Name string `json:"name"`
Number int `json:"number"`
}
type prFileDiffEntry struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
OmittedLines int `json:"omitted_lines,omitempty"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
}
func (t *PRFilesDiff) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
var args prFilesDiffArgs
if err := parseArgs(raw, &args); err != nil {
return nil, err
}
if err := t.a.Check(args.Owner); err != nil {
return nil, err
}
if args.Number < 1 {
return nil, fmt.Errorf("number must be >= 1: %w", gitea.ErrValidation)
}
files, err := t.c.GetPullRequestFiles(ctx, args.Owner, args.Name, args.Number)
if err != nil {
return nil, err
}
rawDiff, err := t.c.GetPullRequestDiff(ctx, args.Owner, args.Name, args.Number)
if err != nil {
return nil, err
}
// Split unified diff by per-file headers ("diff --git a/path b/path")
perFile := splitUnifiedDiff(rawDiff)
out := struct {
Files []prFileDiffEntry `json:"files"`
OmittedFiles []string `json:"omitted_files,omitempty"`
ResponseTruncated bool `json:"response_truncated"`
}{
Files: make([]prFileDiffEntry, 0, len(files)),
}
totalBytes := 0
for _, f := range files {
// look up the diff for this file (best-effort by path match)
diffBytes, ok := perFile[f.Filename]
if !ok {
diffBytes = []byte{}
}
entry := prFileDiffEntry{
Path: f.Filename,
Additions: f.Additions,
Deletions: f.Deletions,
}
// Per-file cap
if len(diffBytes) > maxFileDiffBytes {
truncated := diffBytes[:maxFileDiffBytes]
omittedLines := bytes.Count(diffBytes[maxFileDiffBytes:], []byte("\n"))
entry.Diff = string(truncated)
entry.Truncated = true
entry.OmittedLines = omittedLines
} else {
entry.Diff = string(diffBytes)
}
// Response cap — if adding this entry would exceed, push to omitted_files
entryEstimate := len(entry.Diff) + 200 // small overhead for path + counts
if totalBytes+entryEstimate > maxResponseBytes {
out.OmittedFiles = append(out.OmittedFiles, f.Filename)
out.ResponseTruncated = true
continue
}
totalBytes += entryEstimate
out.Files = append(out.Files, entry)
}
return textOK(out)
}
// splitUnifiedDiff parses a unified diff and returns a map from filename to that file's
// portion of the diff. The unified diff format starts each file with a line like
// "diff --git a/<path> b/<path>".
func splitUnifiedDiff(d []byte) map[string][]byte {
m := map[string][]byte{}
scanner := bufio.NewScanner(bytes.NewReader(d))
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) // allow long diffs
var currentFile string
var current bytes.Buffer
flush := func() {
if currentFile != "" {
m[currentFile] = []byte(current.String())
current.Reset()
}
}
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "diff --git ") {
flush()
// Parse: "diff --git a/<path> b/<path>"
rest := strings.TrimPrefix(line, "diff --git a/")
parts := strings.SplitN(rest, " b/", 2)
if len(parts) == 2 {
currentFile = parts[0]
} else {
currentFile = ""
}
}
if currentFile != "" {
current.WriteString(line)
current.WriteByte('\n')
}
}
flush()
return m
}

View File

@@ -0,0 +1,183 @@
package tools_test
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
"gitea.d-ma.be/mathias/gitea-mcp/internal/tools"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// buildDiff builds a synthetic unified diff for a set of files.
// Each file gets `linesPerFile` added lines.
func buildDiff(files []string, linesPerFile int) string {
var sb strings.Builder
for _, f := range files {
fmt.Fprintf(&sb, "diff --git a/%s b/%s\n", f, f)
fmt.Fprintf(&sb, "--- a/%s\n+++ b/%s\n", f, f)
fmt.Fprintf(&sb, "@@ -0,0 +1,%d @@\n", linesPerFile)
sb.WriteString(strings.Repeat("+abcdefghij\n", linesPerFile))
}
return sb.String()
}
// buildFilesJSON builds the JSON list of PullRequestFile objects.
func buildFilesJSON(files []string, additions int) string {
entries := make([]string, len(files))
for i, f := range files {
entries[i] = fmt.Sprintf(`{"filename":%q,"status":"modified","additions":%d,"deletions":0}`, f, additions)
}
return "[" + strings.Join(entries, ",") + "]"
}
// newPRFilesDiffServer creates a test server that serves both the /files and .diff endpoints.
func newPRFilesDiffServer(t *testing.T, filesJSON, rawDiff string) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case r.URL.Path == "/api/v1/repos/o/r/pulls/1/files":
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(filesJSON))
case r.URL.Path == "/api/v1/repos/o/r/pulls/1.diff":
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(rawDiff))
default:
t.Errorf("unexpected request: %s", r.URL.Path)
w.WriteHeader(http.StatusNotFound)
}
}))
}
func TestPRFilesDiffSmall(t *testing.T) {
// Two files, each ~120 bytes of diff — well under per-file and total caps.
fileNames := []string{"main.go", "util.go"}
// ~10 lines each = ~120 bytes per file diff
rawDiff := buildDiff(fileNames, 10)
filesJSON := buildFilesJSON(fileNames, 10)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
Additions int `json:"additions"`
Deletions int `json:"deletions"`
} `json:"files"`
OmittedFiles []string `json:"omitted_files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
assert.Len(t, out.Files, 2)
assert.Empty(t, out.OmittedFiles)
assert.False(t, out.ResponseTruncated)
for _, f := range out.Files {
assert.False(t, f.Truncated, "file %s should not be truncated", f.Path)
assert.NotEmpty(t, f.Diff)
assert.Equal(t, 10, f.Additions)
assert.Equal(t, 0, f.Deletions)
}
paths := []string{out.Files[0].Path, out.Files[1].Path}
assert.ElementsMatch(t, fileNames, paths)
}
func TestPRFilesDiffPerFileTruncated(t *testing.T) {
// One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines).
fileNames := []string{"bigfile.go"}
linesPerFile := 2560 // ~30720 bytes > 20KB cap
rawDiff := buildDiff(fileNames, linesPerFile)
filesJSON := buildFilesJSON(fileNames, linesPerFile)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
Diff string `json:"diff"`
Truncated bool `json:"truncated"`
OmittedLines int `json:"omitted_lines"`
Additions int `json:"additions"`
} `json:"files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
require.Len(t, out.Files, 1)
f := out.Files[0]
assert.Equal(t, "bigfile.go", f.Path)
assert.True(t, f.Truncated, "file should be truncated")
assert.Greater(t, f.OmittedLines, 0, "omitted_lines should be > 0")
assert.LessOrEqual(t, len(f.Diff), 20*1024+200, "diff should be capped near 20KB")
assert.False(t, out.ResponseTruncated)
}
func TestPRFilesDiffResponseCapped(t *testing.T) {
// 25 files × ~10KB diff each = ~250KB raw, well over the 200KB response cap.
// Each file: 850 lines × 12 bytes = 10200 bytes per file.
numFiles := 25
linesPerFile := 850
fileNames := make([]string, numFiles)
for i := range fileNames {
fileNames[i] = fmt.Sprintf("file%02d.go", i)
}
rawDiff := buildDiff(fileNames, linesPerFile)
filesJSON := buildFilesJSON(fileNames, linesPerFile)
srv := newPRFilesDiffServer(t, filesJSON, rawDiff)
defer srv.Close()
tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"}))
result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`))
require.NoError(t, err)
var out struct {
Files []struct {
Path string `json:"path"`
} `json:"files"`
OmittedFiles []string `json:"omitted_files"`
ResponseTruncated bool `json:"response_truncated"`
}
require.NoError(t, json.Unmarshal(result, &out))
assert.True(t, out.ResponseTruncated, "response should be truncated")
assert.NotEmpty(t, out.OmittedFiles, "some files should be omitted")
assert.NotEmpty(t, out.Files, "some files should be included")
// Total files accounted for should equal numFiles.
totalAccountedFor := len(out.Files) + len(out.OmittedFiles)
assert.Equal(t, numFiles, totalAccountedFor)
}
func TestPRFilesDiffAllowlistRejects(t *testing.T) {
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"allowed"}))
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"evil","name":"r","number":1}`))
require.Error(t, err)
}
func TestPRFilesDiffRequiresValidNumber(t *testing.T) {
tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"o"}))
_, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":0}`))
require.Error(t, err)
assert.ErrorIs(t, err, gitea.ErrValidation)
}