diff --git a/cmd/gitea-mcp/main.go b/cmd/gitea-mcp/main.go index dde8a6f..35adbde 100644 --- a/cmd/gitea-mcp/main.go +++ b/cmd/gitea-mcp/main.go @@ -40,6 +40,7 @@ func main() { reg.Register(tools.NewIssueCreate(giteaClient, ownerAllow)) reg.Register(tools.NewIssueComment(giteaClient, ownerAllow)) reg.Register(tools.NewPRComment(giteaClient, ownerAllow)) + reg.Register(tools.NewPRFilesDiff(giteaClient, ownerAllow)) mcpSrv := mcp.NewServer(mcp.ServerOptions{ Registry: reg, diff --git a/internal/gitea/pulls.go b/internal/gitea/pulls.go index 4573aa0..a01b464 100644 --- a/internal/gitea/pulls.go +++ b/internal/gitea/pulls.go @@ -64,3 +64,40 @@ func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, index i } return &pr, nil } + +type PullRequestFile struct { + Filename string `json:"filename"` + Status string `json:"status"` // added | modified | deleted | renamed + Additions int `json:"additions"` + Deletions int `json:"deletions"` +} + +func (c *Client) GetPullRequestFiles(ctx context.Context, owner, repo string, index int) ([]PullRequestFile, error) { + p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d/files", owner, repo, index) + body, status, err := c.GetJSON(ctx, p) + if err != nil { + return nil, err + } + if err := MapStatus(status, body); err != nil { + return nil, err + } + var files []PullRequestFile + if err := json.Unmarshal(body, &files); err != nil { + return nil, err + } + return files, nil +} + +// GetPullRequestDiff returns the raw unified diff. The endpoint serves text/plain, not JSON, +// so we use doRaw to bypass the json Accept header expectation. +func (c *Client) GetPullRequestDiff(ctx context.Context, owner, repo string, index int) ([]byte, error) { + p := fmt.Sprintf("/api/v1/repos/%s/%s/pulls/%d.diff", owner, repo, index) + resp, err := c.doRaw(ctx, "GET", p, nil) + if err != nil { + return nil, err + } + if err := MapStatus(resp.Status, resp.Body); err != nil { + return nil, err + } + return resp.Body, nil +} diff --git a/internal/gitea/pulls_test.go b/internal/gitea/pulls_test.go index cd77f49..f2dbb71 100644 --- a/internal/gitea/pulls_test.go +++ b/internal/gitea/pulls_test.go @@ -93,3 +93,46 @@ func TestGetPullRequest(t *testing.T) { assert.Equal(t, "open", pr.State) assert.True(t, pr.Draft) } + +func TestGetPullRequestFiles(t *testing.T) { + filesJSON := `[ + {"filename":"main.go","status":"modified","additions":10,"deletions":5}, + {"filename":"README.md","status":"added","additions":20,"deletions":0} + ]` + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/api/v1/repos/o/r/pulls/42/files", r.URL.Path) + assert.Equal(t, http.MethodGet, r.Method) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(filesJSON)) + })) + defer srv.Close() + + c := gitea.NewClient(srv.URL, "tok") + files, err := c.GetPullRequestFiles(context.Background(), "o", "r", 42) + require.NoError(t, err) + require.Len(t, files, 2) + assert.Equal(t, "main.go", files[0].Filename) + assert.Equal(t, "modified", files[0].Status) + assert.Equal(t, 10, files[0].Additions) + assert.Equal(t, 5, files[0].Deletions) + assert.Equal(t, "README.md", files[1].Filename) + assert.Equal(t, "added", files[1].Status) + assert.Equal(t, 20, files[1].Additions) + assert.Equal(t, 0, files[1].Deletions) +} + +func TestGetPullRequestDiff(t *testing.T) { + rawDiff := "diff --git a/main.go b/main.go\n--- a/main.go\n+++ b/main.go\n@@ -1,2 +1,3 @@\n+package main\n" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/api/v1/repos/o/r/pulls/42.diff", r.URL.Path) + assert.Equal(t, http.MethodGet, r.Method) + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte(rawDiff)) + })) + defer srv.Close() + + c := gitea.NewClient(srv.URL, "tok") + diff, err := c.GetPullRequestDiff(context.Background(), "o", "r", 42) + require.NoError(t, err) + assert.Equal(t, []byte(rawDiff), diff) +} diff --git a/internal/tools/pr_files_diff.go b/internal/tools/pr_files_diff.go new file mode 100644 index 0000000..74a6945 --- /dev/null +++ b/internal/tools/pr_files_diff.go @@ -0,0 +1,171 @@ +package tools + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "strings" + + "gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist" + "gitea.d-ma.be/mathias/gitea-mcp/internal/gitea" + "gitea.d-ma.be/mathias/gitea-mcp/internal/registry" +) + +const ( + maxFileDiffBytes = 20 * 1024 + maxResponseBytes = 200 * 1024 +) + +type PRFilesDiff struct { + c *gitea.Client + a *allowlist.Allowlist +} + +func NewPRFilesDiff(c *gitea.Client, a *allowlist.Allowlist) *PRFilesDiff { + return &PRFilesDiff{c: c, a: a} +} + +func (t *PRFilesDiff) Descriptor() registry.ToolDescriptor { + return registry.ToolDescriptor{ + Name: "pr_files_diff", + Description: "Get a pull request's per-file diff with size caps (20KB/file, 200KB total).", + InputSchema: json.RawMessage(`{ + "type":"object", + "properties":{ + "owner":{"type":"string"}, + "name":{"type":"string"}, + "number":{"type":"integer","minimum":1} + }, + "required":["owner","name","number"] + }`), + } +} + +type prFilesDiffArgs struct { + Owner string `json:"owner"` + Name string `json:"name"` + Number int `json:"number"` +} + +type prFileDiffEntry struct { + Path string `json:"path"` + Diff string `json:"diff"` + Truncated bool `json:"truncated"` + OmittedLines int `json:"omitted_lines,omitempty"` + Additions int `json:"additions"` + Deletions int `json:"deletions"` +} + +func (t *PRFilesDiff) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { + var args prFilesDiffArgs + if err := parseArgs(raw, &args); err != nil { + return nil, err + } + if err := t.a.Check(args.Owner); err != nil { + return nil, err + } + if args.Number < 1 { + return nil, fmt.Errorf("number must be >= 1: %w", gitea.ErrValidation) + } + + files, err := t.c.GetPullRequestFiles(ctx, args.Owner, args.Name, args.Number) + if err != nil { + return nil, err + } + + rawDiff, err := t.c.GetPullRequestDiff(ctx, args.Owner, args.Name, args.Number) + if err != nil { + return nil, err + } + + // Split unified diff by per-file headers ("diff --git a/path b/path") + perFile := splitUnifiedDiff(rawDiff) + + out := struct { + Files []prFileDiffEntry `json:"files"` + OmittedFiles []string `json:"omitted_files,omitempty"` + ResponseTruncated bool `json:"response_truncated"` + }{ + Files: make([]prFileDiffEntry, 0, len(files)), + } + + totalBytes := 0 + for _, f := range files { + // look up the diff for this file (best-effort by path match) + diffBytes, ok := perFile[f.Filename] + if !ok { + diffBytes = []byte{} + } + + entry := prFileDiffEntry{ + Path: f.Filename, + Additions: f.Additions, + Deletions: f.Deletions, + } + + // Per-file cap + if len(diffBytes) > maxFileDiffBytes { + truncated := diffBytes[:maxFileDiffBytes] + omittedLines := bytes.Count(diffBytes[maxFileDiffBytes:], []byte("\n")) + entry.Diff = string(truncated) + entry.Truncated = true + entry.OmittedLines = omittedLines + } else { + entry.Diff = string(diffBytes) + } + + // Response cap — if adding this entry would exceed, push to omitted_files + entryEstimate := len(entry.Diff) + 200 // small overhead for path + counts + if totalBytes+entryEstimate > maxResponseBytes { + out.OmittedFiles = append(out.OmittedFiles, f.Filename) + out.ResponseTruncated = true + continue + } + totalBytes += entryEstimate + out.Files = append(out.Files, entry) + } + + return textOK(out) +} + +// splitUnifiedDiff parses a unified diff and returns a map from filename to that file's +// portion of the diff. The unified diff format starts each file with a line like +// "diff --git a/ b/". +func splitUnifiedDiff(d []byte) map[string][]byte { + m := map[string][]byte{} + scanner := bufio.NewScanner(bytes.NewReader(d)) + scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) // allow long diffs + + var currentFile string + var current bytes.Buffer + + flush := func() { + if currentFile != "" { + m[currentFile] = []byte(current.String()) + current.Reset() + } + } + + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "diff --git ") { + flush() + // Parse: "diff --git a/ b/" + rest := strings.TrimPrefix(line, "diff --git a/") + parts := strings.SplitN(rest, " b/", 2) + if len(parts) == 2 { + currentFile = parts[0] + } else { + currentFile = "" + } + } + if currentFile != "" { + current.WriteString(line) + current.WriteByte('\n') + } + } + flush() + return m +} diff --git a/internal/tools/pr_files_diff_test.go b/internal/tools/pr_files_diff_test.go new file mode 100644 index 0000000..5824893 --- /dev/null +++ b/internal/tools/pr_files_diff_test.go @@ -0,0 +1,183 @@ +package tools_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist" + "gitea.d-ma.be/mathias/gitea-mcp/internal/gitea" + "gitea.d-ma.be/mathias/gitea-mcp/internal/tools" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// buildDiff builds a synthetic unified diff for a set of files. +// Each file gets `linesPerFile` added lines. +func buildDiff(files []string, linesPerFile int) string { + var sb strings.Builder + for _, f := range files { + fmt.Fprintf(&sb, "diff --git a/%s b/%s\n", f, f) + fmt.Fprintf(&sb, "--- a/%s\n+++ b/%s\n", f, f) + fmt.Fprintf(&sb, "@@ -0,0 +1,%d @@\n", linesPerFile) + sb.WriteString(strings.Repeat("+abcdefghij\n", linesPerFile)) + } + return sb.String() +} + +// buildFilesJSON builds the JSON list of PullRequestFile objects. +func buildFilesJSON(files []string, additions int) string { + entries := make([]string, len(files)) + for i, f := range files { + entries[i] = fmt.Sprintf(`{"filename":%q,"status":"modified","additions":%d,"deletions":0}`, f, additions) + } + return "[" + strings.Join(entries, ",") + "]" +} + +// newPRFilesDiffServer creates a test server that serves both the /files and .diff endpoints. +func newPRFilesDiffServer(t *testing.T, filesJSON, rawDiff string) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/api/v1/repos/o/r/pulls/1/files": + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(filesJSON)) + case r.URL.Path == "/api/v1/repos/o/r/pulls/1.diff": + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte(rawDiff)) + default: + t.Errorf("unexpected request: %s", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + } + })) +} + +func TestPRFilesDiffSmall(t *testing.T) { + // Two files, each ~120 bytes of diff — well under per-file and total caps. + fileNames := []string{"main.go", "util.go"} + // ~10 lines each = ~120 bytes per file diff + rawDiff := buildDiff(fileNames, 10) + filesJSON := buildFilesJSON(fileNames, 10) + + srv := newPRFilesDiffServer(t, filesJSON, rawDiff) + defer srv.Close() + + tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"})) + result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`)) + require.NoError(t, err) + + var out struct { + Files []struct { + Path string `json:"path"` + Diff string `json:"diff"` + Truncated bool `json:"truncated"` + Additions int `json:"additions"` + Deletions int `json:"deletions"` + } `json:"files"` + OmittedFiles []string `json:"omitted_files"` + ResponseTruncated bool `json:"response_truncated"` + } + require.NoError(t, json.Unmarshal(result, &out)) + + assert.Len(t, out.Files, 2) + assert.Empty(t, out.OmittedFiles) + assert.False(t, out.ResponseTruncated) + + for _, f := range out.Files { + assert.False(t, f.Truncated, "file %s should not be truncated", f.Path) + assert.NotEmpty(t, f.Diff) + assert.Equal(t, 10, f.Additions) + assert.Equal(t, 0, f.Deletions) + } + paths := []string{out.Files[0].Path, out.Files[1].Path} + assert.ElementsMatch(t, fileNames, paths) +} + +func TestPRFilesDiffPerFileTruncated(t *testing.T) { + // One file with a 30KB diff (each "+abcdefghij\n" = 12 bytes; 30KB / 12 ≈ 2560 lines). + fileNames := []string{"bigfile.go"} + linesPerFile := 2560 // ~30720 bytes > 20KB cap + rawDiff := buildDiff(fileNames, linesPerFile) + filesJSON := buildFilesJSON(fileNames, linesPerFile) + + srv := newPRFilesDiffServer(t, filesJSON, rawDiff) + defer srv.Close() + + tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"})) + result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`)) + require.NoError(t, err) + + var out struct { + Files []struct { + Path string `json:"path"` + Diff string `json:"diff"` + Truncated bool `json:"truncated"` + OmittedLines int `json:"omitted_lines"` + Additions int `json:"additions"` + } `json:"files"` + ResponseTruncated bool `json:"response_truncated"` + } + require.NoError(t, json.Unmarshal(result, &out)) + + require.Len(t, out.Files, 1) + f := out.Files[0] + assert.Equal(t, "bigfile.go", f.Path) + assert.True(t, f.Truncated, "file should be truncated") + assert.Greater(t, f.OmittedLines, 0, "omitted_lines should be > 0") + assert.LessOrEqual(t, len(f.Diff), 20*1024+200, "diff should be capped near 20KB") + assert.False(t, out.ResponseTruncated) +} + +func TestPRFilesDiffResponseCapped(t *testing.T) { + // 25 files × ~10KB diff each = ~250KB raw, well over the 200KB response cap. + // Each file: 850 lines × 12 bytes = 10200 bytes per file. + numFiles := 25 + linesPerFile := 850 + fileNames := make([]string, numFiles) + for i := range fileNames { + fileNames[i] = fmt.Sprintf("file%02d.go", i) + } + rawDiff := buildDiff(fileNames, linesPerFile) + filesJSON := buildFilesJSON(fileNames, linesPerFile) + + srv := newPRFilesDiffServer(t, filesJSON, rawDiff) + defer srv.Close() + + tool := tools.NewPRFilesDiff(gitea.NewClient(srv.URL, "tok"), allowlist.New([]string{"o"})) + result, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":1}`)) + require.NoError(t, err) + + var out struct { + Files []struct { + Path string `json:"path"` + } `json:"files"` + OmittedFiles []string `json:"omitted_files"` + ResponseTruncated bool `json:"response_truncated"` + } + require.NoError(t, json.Unmarshal(result, &out)) + + assert.True(t, out.ResponseTruncated, "response should be truncated") + assert.NotEmpty(t, out.OmittedFiles, "some files should be omitted") + assert.NotEmpty(t, out.Files, "some files should be included") + + // Total files accounted for should equal numFiles. + totalAccountedFor := len(out.Files) + len(out.OmittedFiles) + assert.Equal(t, numFiles, totalAccountedFor) +} + +func TestPRFilesDiffAllowlistRejects(t *testing.T) { + tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"allowed"})) + _, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"evil","name":"r","number":1}`)) + require.Error(t, err) +} + +func TestPRFilesDiffRequiresValidNumber(t *testing.T) { + tool := tools.NewPRFilesDiff(gitea.NewClient("http://unused", ""), allowlist.New([]string{"o"})) + _, err := tool.Call(context.Background(), json.RawMessage(`{"owner":"o","name":"r","number":0}`)) + require.Error(t, err) + assert.ErrorIs(t, err, gitea.ErrValidation) +}