190 lines
4.4 KiB
Go
190 lines
4.4 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.d-ma.be/mathias/gitea-mcp/internal/allowlist"
|
|
"gitea.d-ma.be/mathias/gitea-mcp/internal/gitea"
|
|
"gitea.d-ma.be/mathias/gitea-mcp/internal/registry"
|
|
)
|
|
|
|
type semaphore chan struct{}
|
|
|
|
func newSem(n int) semaphore { return make(semaphore, n) }
|
|
func (s semaphore) acquire() { s <- struct{}{} }
|
|
func (s semaphore) release() { <-s }
|
|
|
|
type CodeSearch struct {
|
|
c *gitea.Client
|
|
a *allowlist.Allowlist
|
|
}
|
|
|
|
func NewCodeSearch(c *gitea.Client, a *allowlist.Allowlist) *CodeSearch {
|
|
return &CodeSearch{c: c, a: a}
|
|
}
|
|
|
|
func (t *CodeSearch) Descriptor() registry.ToolDescriptor {
|
|
return registry.ToolDescriptor{
|
|
Name: "code_search",
|
|
Description: "Search code across one repo or fan out across an owner's repos.",
|
|
InputSchema: json.RawMessage(`{
|
|
"type":"object",
|
|
"properties":{
|
|
"q":{"type":"string"},
|
|
"owner":{"type":"string"},
|
|
"repo":{"type":"string"},
|
|
"page":{"type":"integer","minimum":1},
|
|
"limit":{"type":"integer","minimum":1,"maximum":50}
|
|
},
|
|
"required":["q","owner"]
|
|
}`),
|
|
}
|
|
}
|
|
|
|
type codeSearchArgs struct {
|
|
Q string `json:"q"`
|
|
Owner string `json:"owner"`
|
|
Repo string `json:"repo"`
|
|
Page int `json:"page"`
|
|
Limit int `json:"limit"`
|
|
}
|
|
|
|
type codeSearchResult struct {
|
|
Repo string `json:"repo"`
|
|
Path string `json:"path"`
|
|
Snippet string `json:"snippet"`
|
|
Score float64 `json:"score"`
|
|
HTMLURL string `json:"html_url"`
|
|
}
|
|
|
|
func (t *CodeSearch) Call(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
|
var args codeSearchArgs
|
|
if err := parseArgs(raw, &args); err != nil {
|
|
return nil, err
|
|
}
|
|
if args.Q == "" {
|
|
return nil, fmt.Errorf("q is required: %w", gitea.ErrValidation)
|
|
}
|
|
if err := t.a.Check(args.Owner); err != nil {
|
|
return nil, err
|
|
}
|
|
if args.Page < 1 {
|
|
args.Page = 1
|
|
}
|
|
args.Limit = capLimit(args.Limit, 30)
|
|
|
|
if args.Repo != "" {
|
|
return t.singleRepo(ctx, args)
|
|
}
|
|
return t.fanOut(ctx, args)
|
|
}
|
|
|
|
func (t *CodeSearch) singleRepo(ctx context.Context, args codeSearchArgs) (json.RawMessage, error) {
|
|
hits, err := t.c.SearchCode(ctx, args.Owner, args.Repo, args.Q, args.Page, args.Limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
results := make([]codeSearchResult, 0, len(hits))
|
|
repoFull := args.Owner + "/" + args.Repo
|
|
for _, h := range hits {
|
|
score := h.Score
|
|
if score == 0 {
|
|
score = 1.0
|
|
}
|
|
results = append(results, codeSearchResult{
|
|
Repo: repoFull,
|
|
Path: h.Path,
|
|
Snippet: h.Snippet,
|
|
Score: score,
|
|
HTMLURL: h.HTMLURL,
|
|
})
|
|
}
|
|
out := map[string]any{"results": results}
|
|
if len(hits) == args.Limit {
|
|
out["next_page"] = args.Page + 1
|
|
}
|
|
return textOK(out)
|
|
}
|
|
|
|
func (t *CodeSearch) fanOut(ctx context.Context, args codeSearchArgs) (json.RawMessage, error) {
|
|
repos, err := t.c.ListRepos(ctx, args.Owner, 1, 50)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
type repoResult struct {
|
|
repo string
|
|
hits []gitea.CodeSearchHit
|
|
err error
|
|
}
|
|
resultsCh := make(chan repoResult, len(repos))
|
|
sem := newSem(5)
|
|
var wg sync.WaitGroup
|
|
|
|
for _, r := range repos {
|
|
repo := r // capture
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
sem.acquire()
|
|
defer sem.release()
|
|
|
|
rctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
hits, err := t.c.SearchCode(rctx, args.Owner, repo.Name, args.Q, 1, args.Limit)
|
|
resultsCh <- repoResult{repo: args.Owner + "/" + repo.Name, hits: hits, err: err}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
close(resultsCh)
|
|
|
|
merged := make([]codeSearchResult, 0)
|
|
var partialRepos []string
|
|
for rr := range resultsCh {
|
|
if rr.err != nil {
|
|
partialRepos = append(partialRepos, rr.repo)
|
|
continue
|
|
}
|
|
for _, h := range rr.hits {
|
|
score := h.Score
|
|
if score == 0 {
|
|
score = 1.0
|
|
}
|
|
merged = append(merged, codeSearchResult{
|
|
Repo: rr.repo, Path: h.Path, Snippet: h.Snippet, Score: score, HTMLURL: h.HTMLURL,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Sort by score desc, then by repo+path for determinism.
|
|
sort.Slice(merged, func(i, j int) bool {
|
|
if merged[i].Score != merged[j].Score {
|
|
return merged[i].Score > merged[j].Score
|
|
}
|
|
if merged[i].Repo != merged[j].Repo {
|
|
return merged[i].Repo < merged[j].Repo
|
|
}
|
|
return merged[i].Path < merged[j].Path
|
|
})
|
|
if len(merged) > args.Limit {
|
|
merged = merged[:args.Limit]
|
|
}
|
|
|
|
out := map[string]any{
|
|
"results": merged,
|
|
"partial": len(partialRepos) > 0,
|
|
}
|
|
if len(partialRepos) > 0 {
|
|
sort.Strings(partialRepos)
|
|
out["partial_repos"] = partialRepos
|
|
}
|
|
return textOK(out)
|
|
}
|