package tools import ( "bufio" "bytes" "context" "encoding/json" "fmt" "strings" "gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist" "gitea.d-ma.be/mathias/gitea-mcp/internal/gitea" "gitea.d-ma.be/mathias/gitea-mcp/internal/registry" ) const ( maxFileDiffBytes = 20 * 1024 maxResponseBytes = 200 * 1024 ) type PRFilesDiff struct { c *gitea.Client a *allowlist.Allowlist } func NewPRFilesDiff(c *gitea.Client, a *allowlist.Allowlist) *PRFilesDiff { return &PRFilesDiff{c: c, a: a} } func (t *PRFilesDiff) Descriptor() registry.ToolDescriptor { return registry.ToolDescriptor{ Name: "pr_files_diff", Description: "Get a pull request's per-file diff with size caps (20KB/file, 200KB total).", InputSchema: json.RawMessage(`{ "type":"object", "properties":{ "owner":{"type":"string"}, "name":{"type":"string"}, "number":{"type":"integer","minimum":1} }, "required":["owner","name","number"] }`), } } type prFilesDiffArgs struct { Owner string `json:"owner"` Name string `json:"name"` Number int `json:"number"` } type prFileDiffEntry struct { Path string `json:"path"` Diff string `json:"diff"` Truncated bool `json:"truncated"` OmittedLines int `json:"omitted_lines,omitempty"` Additions int `json:"additions"` Deletions int `json:"deletions"` } func (t *PRFilesDiff) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) { var args prFilesDiffArgs if err := parseArgs(raw, &args); err != nil { return nil, err } if err := t.a.Check(args.Owner); err != nil { return nil, err } if args.Number < 1 { return nil, fmt.Errorf("number must be >= 1: %w", gitea.ErrValidation) } files, err := t.c.GetPullRequestFiles(ctx, args.Owner, args.Name, args.Number) if err != nil { return nil, err } rawDiff, err := t.c.GetPullRequestDiff(ctx, args.Owner, args.Name, args.Number) if err != nil { return nil, err } // Split unified diff by per-file headers ("diff --git a/path b/path") perFile := splitUnifiedDiff(rawDiff) out := struct { Files []prFileDiffEntry `json:"files"` OmittedFiles []string `json:"omitted_files,omitempty"` ResponseTruncated bool `json:"response_truncated"` }{ Files: make([]prFileDiffEntry, 0, len(files)), } totalBytes := 0 for _, f := range files { // look up the diff for this file (best-effort by path match) diffBytes, ok := perFile[f.Filename] if !ok { diffBytes = []byte{} } entry := prFileDiffEntry{ Path: f.Filename, Additions: f.Additions, Deletions: f.Deletions, } // Per-file cap if len(diffBytes) > maxFileDiffBytes { truncated := diffBytes[:maxFileDiffBytes] omittedLines := bytes.Count(diffBytes[maxFileDiffBytes:], []byte("\n")) entry.Diff = string(truncated) entry.Truncated = true entry.OmittedLines = omittedLines } else { entry.Diff = string(diffBytes) } // Response cap — if adding this entry would exceed, push to omitted_files entryEstimate := len(entry.Diff) + 200 // small overhead for path + counts if totalBytes+entryEstimate > maxResponseBytes { out.OmittedFiles = append(out.OmittedFiles, f.Filename) out.ResponseTruncated = true continue } totalBytes += entryEstimate out.Files = append(out.Files, entry) } return textOK(out) } // splitUnifiedDiff parses a unified diff and returns a map from filename to that file's // portion of the diff. The unified diff format starts each file with a line like // "diff --git a/ b/". func splitUnifiedDiff(d []byte) map[string][]byte { m := map[string][]byte{} scanner := bufio.NewScanner(bytes.NewReader(d)) scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) // allow long diffs var currentFile string var current bytes.Buffer flush := func() { if currentFile != "" { m[currentFile] = current.Bytes() current.Reset() } } for scanner.Scan() { line := scanner.Text() if strings.HasPrefix(line, "diff --git ") { flush() // Parse: "diff --git a/ b/" rest := strings.TrimPrefix(line, "diff --git a/") parts := strings.SplitN(rest, " b/", 2) if len(parts) == 2 { currentFile = parts[0] } else { currentFile = "" } } if currentFile != "" { current.WriteString(line) current.WriteByte('\n') } } flush() return m }