Compare commits
24 Commits
1c3c9de550
...
v0.9.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6f1cb53295 | ||
|
|
37fdd33b2d | ||
|
|
078ec029da | ||
|
|
4af1036423 | ||
|
|
7a13c75655 | ||
|
|
57462b52ff | ||
|
|
a56a4db963 | ||
|
|
58c57412a9 | ||
|
|
ddd07ae7eb | ||
|
|
61b6247df9 | ||
|
|
75685e7b67 | ||
|
|
fe18e4ee77 | ||
|
|
937355cabe | ||
|
|
5950ef5f0f | ||
|
|
a220fcaf2b | ||
|
|
d1c8e3396f | ||
|
|
3b79311fdd | ||
|
|
7baf8d7e7a | ||
|
|
a8de04c7b6 | ||
|
|
87cf9d0afc | ||
|
|
46adaf2148 | ||
|
|
c11763472c | ||
|
|
189ff89c34 | ||
|
|
c7e0192486 |
@@ -36,9 +36,18 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
5. **Trunk-Based Development — commit directly to main.** Every commit is one
|
||||||
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
logical change (one tool, one fix, one test) with passing tests. Main is always
|
||||||
PR. Do not merge without explicit instruction from Mathias.
|
deployable. Never create long-lived feature branches.
|
||||||
|
|
||||||
|
**Exception — parallel agents on same repo:** If another agent is known to be
|
||||||
|
actively working on the same repo simultaneously, create a short-lived branch
|
||||||
|
(`agent/<description>`), finish the task, and merge to main within the same
|
||||||
|
session. Do not leave agent branches open between sessions.
|
||||||
|
|
||||||
|
**Exception — external contributor or client four-eyes requirement:** Use
|
||||||
|
PR flow only when a human reviewer outside the project is required. Document
|
||||||
|
the reason in PROJECT.md.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
@@ -49,9 +58,10 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
| Build | Task (taskfile.dev) | Make | — |
|
| Build | Task (taskfile.dev) | Make | — |
|
||||||
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
||||||
| DB | PostgreSQL + sqlc | SQLite | — |
|
| DB | PostgreSQL + sqlc | SQLite | — |
|
||||||
| Search | Qdrant (vector), BM25 | — | — |
|
| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — |
|
||||||
| Logging | slog (structured) | — | — |
|
| Logging | slog (structured) | — | — |
|
||||||
| Testing | Table-driven, testify | — | — |
|
| Testing | Table-driven, testify | — | — |
|
||||||
|
| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — |
|
||||||
|
|
||||||
Exploratory: Rust, Zig — I'll tell you when I want these.
|
Exploratory: Rust, Zig — I'll tell you when I want these.
|
||||||
|
|
||||||
@@ -61,9 +71,12 @@ Exploratory: Rust, Zig — I'll tell you when I want these.
|
|||||||
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
||||||
- **Naming**: stdlib conventions, no stuttering
|
- **Naming**: stdlib conventions, no stuttering
|
||||||
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
||||||
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), one concern per PR, PR describes *why* not *what*
|
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main,
|
||||||
|
one logical change per commit, CI is the quality gate
|
||||||
|
- **Never**: long-lived feature branches, PRs for solo work, direct push without
|
||||||
|
passing `task check` locally first
|
||||||
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
||||||
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc are pre-approved; anything else needs justification in the commit message
|
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message
|
||||||
|
|
||||||
## Infrastructure
|
## Infrastructure
|
||||||
|
|
||||||
@@ -71,7 +84,7 @@ Three machines on Tailscale:
|
|||||||
|
|
||||||
| Machine | Role | Key specs |
|
| Machine | Role | Key specs |
|
||||||
|---------|------|-----------|
|
|---------|------|-----------|
|
||||||
| koala | GPU inference, heavy compute | RTX 5070, runs llama-swap, Qdrant |
|
| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector |
|
||||||
| iguana | Services, builds | M2 Ultra Mac |
|
| iguana | Services, builds | M2 Ultra Mac |
|
||||||
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
||||||
|
|
||||||
@@ -103,18 +116,64 @@ See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project.
|
|||||||
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
||||||
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
||||||
|
|
||||||
## Knowledge base
|
## Knowledge base — actively use it
|
||||||
|
|
||||||
When available, agents can query the shared knowledge base:
|
A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions,
|
||||||
|
hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems,
|
||||||
|
Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional
|
||||||
|
reference material — query it actively, not just when explicitly told.**
|
||||||
|
|
||||||
- **MCP**: `mcp://hyperguild.<TAILNET>.ts.net:3100/knowledge`
|
### When to query (treat as a reflex)
|
||||||
- **HTTP**: `http://hyperguild.<TAILNET>.ts.net:3100/api/v1/search`
|
|
||||||
|
|
||||||
<!-- TODO: replace <TAILNET> placeholder with the real Tailscale tailnet
|
- **Before** starting a non-trivial task — search for prior art with the symptom
|
||||||
name once hyperguild is deployed. Until then, agents that try to
|
AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours.
|
||||||
reach the knowledge service on a host where it isn't running will
|
- **When debugging** — search for the error string, the stack frame, the affected
|
||||||
get DNS NXDOMAIN, which is the desired fail-loudly behavior. -->
|
service. Past you may have already paid this tax.
|
||||||
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`
|
- **Before adopting** a pattern, library, framework, or model name — check if it
|
||||||
|
was tried and rejected, or what the integration footguns are.
|
||||||
|
- **When making architectural decisions** — search for the domain + "ADR" or
|
||||||
|
"decision" to find prior reasoning before re-deriving it.
|
||||||
|
- **When a recommendation feels novel** — challenge yourself: "has this been
|
||||||
|
documented?" The brain often has it.
|
||||||
|
|
||||||
|
### When to write
|
||||||
|
|
||||||
|
After you discover something that **future-you would forget** and that **isn't
|
||||||
|
recoverable from the code, git log, or PR description alone**:
|
||||||
|
|
||||||
|
- Bugs whose root cause is non-obvious and generalisable beyond this project.
|
||||||
|
- Framework / library / model-name quirks that bit you and would bite anyone.
|
||||||
|
- Design principles validated under fire (e.g. "every `_get` needs a `_list`").
|
||||||
|
- Postmortems for incidents: what broke, why, how diagnosed, what to do next time.
|
||||||
|
|
||||||
|
DON'T write project status, sprint progress, PR summaries, or "what I did this
|
||||||
|
session" — those rot fast and the originals are in git/gitea anyway. Brain
|
||||||
|
entries that age well are about *why*, *how to avoid*, and *what to do when*.
|
||||||
|
|
||||||
|
### How to access (per harness)
|
||||||
|
|
||||||
|
| Harness | Query | Write |
|
||||||
|
|---------|-------|-------|
|
||||||
|
| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool |
|
||||||
|
| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same |
|
||||||
|
| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` |
|
||||||
|
| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files |
|
||||||
|
|
||||||
|
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`.
|
||||||
|
- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as
|
||||||
|
fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml`
|
||||||
|
on the koala k3s cluster; don't hardcode local-only model names into the
|
||||||
|
berget URL (see knowledge entry on namespace mismatches).
|
||||||
|
|
||||||
|
### Quick reflex checks
|
||||||
|
|
||||||
|
If you find yourself about to say any of these out loud, you owe yourself a brain query first:
|
||||||
|
|
||||||
|
- "I think the issue might be..."
|
||||||
|
- "Let me try X and see..."
|
||||||
|
- "I'll just write a script to..."
|
||||||
|
- "This is probably a new bug..."
|
||||||
|
- "Has anyone done this before?" — *yes, probably, go check.*
|
||||||
|
|
||||||
## Client work rules
|
## Client work rules
|
||||||
|
|
||||||
@@ -221,31 +280,28 @@ Key skills:
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
@@ -47,31 +47,28 @@
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
@@ -41,9 +41,18 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
5. **Trunk-Based Development — commit directly to main.** Every commit is one
|
||||||
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
logical change (one tool, one fix, one test) with passing tests. Main is always
|
||||||
PR. Do not merge without explicit instruction from Mathias.
|
deployable. Never create long-lived feature branches.
|
||||||
|
|
||||||
|
**Exception — parallel agents on same repo:** If another agent is known to be
|
||||||
|
actively working on the same repo simultaneously, create a short-lived branch
|
||||||
|
(`agent/<description>`), finish the task, and merge to main within the same
|
||||||
|
session. Do not leave agent branches open between sessions.
|
||||||
|
|
||||||
|
**Exception — external contributor or client four-eyes requirement:** Use
|
||||||
|
PR flow only when a human reviewer outside the project is required. Document
|
||||||
|
the reason in PROJECT.md.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
@@ -54,9 +63,10 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
| Build | Task (taskfile.dev) | Make | — |
|
| Build | Task (taskfile.dev) | Make | — |
|
||||||
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
||||||
| DB | PostgreSQL + sqlc | SQLite | — |
|
| DB | PostgreSQL + sqlc | SQLite | — |
|
||||||
| Search | Qdrant (vector), BM25 | — | — |
|
| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — |
|
||||||
| Logging | slog (structured) | — | — |
|
| Logging | slog (structured) | — | — |
|
||||||
| Testing | Table-driven, testify | — | — |
|
| Testing | Table-driven, testify | — | — |
|
||||||
|
| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — |
|
||||||
|
|
||||||
Exploratory: Rust, Zig — I'll tell you when I want these.
|
Exploratory: Rust, Zig — I'll tell you when I want these.
|
||||||
|
|
||||||
@@ -66,9 +76,12 @@ Exploratory: Rust, Zig — I'll tell you when I want these.
|
|||||||
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
||||||
- **Naming**: stdlib conventions, no stuttering
|
- **Naming**: stdlib conventions, no stuttering
|
||||||
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
||||||
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), one concern per PR, PR describes *why* not *what*
|
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main,
|
||||||
|
one logical change per commit, CI is the quality gate
|
||||||
|
- **Never**: long-lived feature branches, PRs for solo work, direct push without
|
||||||
|
passing `task check` locally first
|
||||||
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
||||||
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc are pre-approved; anything else needs justification in the commit message
|
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message
|
||||||
|
|
||||||
## Infrastructure
|
## Infrastructure
|
||||||
|
|
||||||
@@ -76,7 +89,7 @@ Three machines on Tailscale:
|
|||||||
|
|
||||||
| Machine | Role | Key specs |
|
| Machine | Role | Key specs |
|
||||||
|---------|------|-----------|
|
|---------|------|-----------|
|
||||||
| koala | GPU inference, heavy compute | RTX 5070, runs llama-swap, Qdrant |
|
| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector |
|
||||||
| iguana | Services, builds | M2 Ultra Mac |
|
| iguana | Services, builds | M2 Ultra Mac |
|
||||||
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
||||||
|
|
||||||
@@ -108,18 +121,64 @@ See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project.
|
|||||||
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
||||||
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
||||||
|
|
||||||
## Knowledge base
|
## Knowledge base — actively use it
|
||||||
|
|
||||||
When available, agents can query the shared knowledge base:
|
A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions,
|
||||||
|
hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems,
|
||||||
|
Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional
|
||||||
|
reference material — query it actively, not just when explicitly told.**
|
||||||
|
|
||||||
- **MCP**: `mcp://hyperguild.<TAILNET>.ts.net:3100/knowledge`
|
### When to query (treat as a reflex)
|
||||||
- **HTTP**: `http://hyperguild.<TAILNET>.ts.net:3100/api/v1/search`
|
|
||||||
|
|
||||||
<!-- TODO: replace <TAILNET> placeholder with the real Tailscale tailnet
|
- **Before** starting a non-trivial task — search for prior art with the symptom
|
||||||
name once hyperguild is deployed. Until then, agents that try to
|
AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours.
|
||||||
reach the knowledge service on a host where it isn't running will
|
- **When debugging** — search for the error string, the stack frame, the affected
|
||||||
get DNS NXDOMAIN, which is the desired fail-loudly behavior. -->
|
service. Past you may have already paid this tax.
|
||||||
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`
|
- **Before adopting** a pattern, library, framework, or model name — check if it
|
||||||
|
was tried and rejected, or what the integration footguns are.
|
||||||
|
- **When making architectural decisions** — search for the domain + "ADR" or
|
||||||
|
"decision" to find prior reasoning before re-deriving it.
|
||||||
|
- **When a recommendation feels novel** — challenge yourself: "has this been
|
||||||
|
documented?" The brain often has it.
|
||||||
|
|
||||||
|
### When to write
|
||||||
|
|
||||||
|
After you discover something that **future-you would forget** and that **isn't
|
||||||
|
recoverable from the code, git log, or PR description alone**:
|
||||||
|
|
||||||
|
- Bugs whose root cause is non-obvious and generalisable beyond this project.
|
||||||
|
- Framework / library / model-name quirks that bit you and would bite anyone.
|
||||||
|
- Design principles validated under fire (e.g. "every `_get` needs a `_list`").
|
||||||
|
- Postmortems for incidents: what broke, why, how diagnosed, what to do next time.
|
||||||
|
|
||||||
|
DON'T write project status, sprint progress, PR summaries, or "what I did this
|
||||||
|
session" — those rot fast and the originals are in git/gitea anyway. Brain
|
||||||
|
entries that age well are about *why*, *how to avoid*, and *what to do when*.
|
||||||
|
|
||||||
|
### How to access (per harness)
|
||||||
|
|
||||||
|
| Harness | Query | Write |
|
||||||
|
|---------|-------|-------|
|
||||||
|
| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool |
|
||||||
|
| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same |
|
||||||
|
| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` |
|
||||||
|
| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files |
|
||||||
|
|
||||||
|
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`.
|
||||||
|
- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as
|
||||||
|
fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml`
|
||||||
|
on the koala k3s cluster; don't hardcode local-only model names into the
|
||||||
|
berget URL (see knowledge entry on namespace mismatches).
|
||||||
|
|
||||||
|
### Quick reflex checks
|
||||||
|
|
||||||
|
If you find yourself about to say any of these out loud, you owe yourself a brain query first:
|
||||||
|
|
||||||
|
- "I think the issue might be..."
|
||||||
|
- "Let me try X and see..."
|
||||||
|
- "I'll just write a script to..."
|
||||||
|
- "This is probably a new bug..."
|
||||||
|
- "Has anyone done this before?" — *yes, probably, go check.*
|
||||||
|
|
||||||
## Client work rules
|
## Client work rules
|
||||||
|
|
||||||
@@ -226,31 +285,28 @@ Key skills:
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
128
.cursorrules
128
.cursorrules
@@ -39,9 +39,18 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
5. **Trunk-Based Development — commit directly to main.** Every commit is one
|
||||||
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
logical change (one tool, one fix, one test) with passing tests. Main is always
|
||||||
PR. Do not merge without explicit instruction from Mathias.
|
deployable. Never create long-lived feature branches.
|
||||||
|
|
||||||
|
**Exception — parallel agents on same repo:** If another agent is known to be
|
||||||
|
actively working on the same repo simultaneously, create a short-lived branch
|
||||||
|
(`agent/<description>`), finish the task, and merge to main within the same
|
||||||
|
session. Do not leave agent branches open between sessions.
|
||||||
|
|
||||||
|
**Exception — external contributor or client four-eyes requirement:** Use
|
||||||
|
PR flow only when a human reviewer outside the project is required. Document
|
||||||
|
the reason in PROJECT.md.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
@@ -52,9 +61,10 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
| Build | Task (taskfile.dev) | Make | — |
|
| Build | Task (taskfile.dev) | Make | — |
|
||||||
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
||||||
| DB | PostgreSQL + sqlc | SQLite | — |
|
| DB | PostgreSQL + sqlc | SQLite | — |
|
||||||
| Search | Qdrant (vector), BM25 | — | — |
|
| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — |
|
||||||
| Logging | slog (structured) | — | — |
|
| Logging | slog (structured) | — | — |
|
||||||
| Testing | Table-driven, testify | — | — |
|
| Testing | Table-driven, testify | — | — |
|
||||||
|
| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — |
|
||||||
|
|
||||||
Exploratory: Rust, Zig — I'll tell you when I want these.
|
Exploratory: Rust, Zig — I'll tell you when I want these.
|
||||||
|
|
||||||
@@ -64,9 +74,12 @@ Exploratory: Rust, Zig — I'll tell you when I want these.
|
|||||||
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
||||||
- **Naming**: stdlib conventions, no stuttering
|
- **Naming**: stdlib conventions, no stuttering
|
||||||
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
||||||
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), one concern per PR, PR describes *why* not *what*
|
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main,
|
||||||
|
one logical change per commit, CI is the quality gate
|
||||||
|
- **Never**: long-lived feature branches, PRs for solo work, direct push without
|
||||||
|
passing `task check` locally first
|
||||||
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
||||||
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc are pre-approved; anything else needs justification in the commit message
|
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message
|
||||||
|
|
||||||
## Infrastructure
|
## Infrastructure
|
||||||
|
|
||||||
@@ -74,7 +87,7 @@ Three machines on Tailscale:
|
|||||||
|
|
||||||
| Machine | Role | Key specs |
|
| Machine | Role | Key specs |
|
||||||
|---------|------|-----------|
|
|---------|------|-----------|
|
||||||
| koala | GPU inference, heavy compute | RTX 5070, runs llama-swap, Qdrant |
|
| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector |
|
||||||
| iguana | Services, builds | M2 Ultra Mac |
|
| iguana | Services, builds | M2 Ultra Mac |
|
||||||
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
||||||
|
|
||||||
@@ -106,18 +119,64 @@ See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project.
|
|||||||
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
||||||
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
||||||
|
|
||||||
## Knowledge base
|
## Knowledge base — actively use it
|
||||||
|
|
||||||
When available, agents can query the shared knowledge base:
|
A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions,
|
||||||
|
hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems,
|
||||||
|
Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional
|
||||||
|
reference material — query it actively, not just when explicitly told.**
|
||||||
|
|
||||||
- **MCP**: `mcp://hyperguild.<TAILNET>.ts.net:3100/knowledge`
|
### When to query (treat as a reflex)
|
||||||
- **HTTP**: `http://hyperguild.<TAILNET>.ts.net:3100/api/v1/search`
|
|
||||||
|
|
||||||
<!-- TODO: replace <TAILNET> placeholder with the real Tailscale tailnet
|
- **Before** starting a non-trivial task — search for prior art with the symptom
|
||||||
name once hyperguild is deployed. Until then, agents that try to
|
AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours.
|
||||||
reach the knowledge service on a host where it isn't running will
|
- **When debugging** — search for the error string, the stack frame, the affected
|
||||||
get DNS NXDOMAIN, which is the desired fail-loudly behavior. -->
|
service. Past you may have already paid this tax.
|
||||||
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`
|
- **Before adopting** a pattern, library, framework, or model name — check if it
|
||||||
|
was tried and rejected, or what the integration footguns are.
|
||||||
|
- **When making architectural decisions** — search for the domain + "ADR" or
|
||||||
|
"decision" to find prior reasoning before re-deriving it.
|
||||||
|
- **When a recommendation feels novel** — challenge yourself: "has this been
|
||||||
|
documented?" The brain often has it.
|
||||||
|
|
||||||
|
### When to write
|
||||||
|
|
||||||
|
After you discover something that **future-you would forget** and that **isn't
|
||||||
|
recoverable from the code, git log, or PR description alone**:
|
||||||
|
|
||||||
|
- Bugs whose root cause is non-obvious and generalisable beyond this project.
|
||||||
|
- Framework / library / model-name quirks that bit you and would bite anyone.
|
||||||
|
- Design principles validated under fire (e.g. "every `_get` needs a `_list`").
|
||||||
|
- Postmortems for incidents: what broke, why, how diagnosed, what to do next time.
|
||||||
|
|
||||||
|
DON'T write project status, sprint progress, PR summaries, or "what I did this
|
||||||
|
session" — those rot fast and the originals are in git/gitea anyway. Brain
|
||||||
|
entries that age well are about *why*, *how to avoid*, and *what to do when*.
|
||||||
|
|
||||||
|
### How to access (per harness)
|
||||||
|
|
||||||
|
| Harness | Query | Write |
|
||||||
|
|---------|-------|-------|
|
||||||
|
| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool |
|
||||||
|
| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same |
|
||||||
|
| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` |
|
||||||
|
| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files |
|
||||||
|
|
||||||
|
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`.
|
||||||
|
- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as
|
||||||
|
fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml`
|
||||||
|
on the koala k3s cluster; don't hardcode local-only model names into the
|
||||||
|
berget URL (see knowledge entry on namespace mismatches).
|
||||||
|
|
||||||
|
### Quick reflex checks
|
||||||
|
|
||||||
|
If you find yourself about to say any of these out loud, you owe yourself a brain query first:
|
||||||
|
|
||||||
|
- "I think the issue might be..."
|
||||||
|
- "Let me try X and see..."
|
||||||
|
- "I'll just write a script to..."
|
||||||
|
- "This is probably a new bug..."
|
||||||
|
- "Has anyone done this before?" — *yes, probably, go check.*
|
||||||
|
|
||||||
## Client work rules
|
## Client work rules
|
||||||
|
|
||||||
@@ -224,31 +283,28 @@ Key skills:
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@ jobs:
|
|||||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' }}
|
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' }}
|
||||||
environment: staging
|
environment: staging
|
||||||
env:
|
env:
|
||||||
SERVICE: supervisor
|
|
||||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
|
||||||
INGESTION_IMAGE: gitea.d-ma.be/mathias/ingestion
|
INGESTION_IMAGE: gitea.d-ma.be/mathias/ingestion
|
||||||
ROUTING_IMAGE: gitea.d-ma.be/mathias/routing
|
ROUTING_IMAGE: gitea.d-ma.be/mathias/routing
|
||||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||||
@@ -23,27 +21,6 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build and push supervisor image
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
|
||||||
IMAGE_TAG="${{ github.sha }}"
|
|
||||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
|
||||||
|
|
||||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
|
||||||
--frontend dockerfile.v0 \
|
|
||||||
--local context=. \
|
|
||||||
--local dockerfile=. \
|
|
||||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
|
||||||
--output type=oci,dest=/tmp/supervisor-image.tar
|
|
||||||
|
|
||||||
skopeo copy \
|
|
||||||
oci-archive:/tmp/supervisor-image.tar \
|
|
||||||
docker://${IMAGE}:${IMAGE_TAG} \
|
|
||||||
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
|
||||||
|
|
||||||
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
|
||||||
|
|
||||||
- name: Build and push ingestion image
|
- name: Build and push ingestion image
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
@@ -101,25 +78,21 @@ jobs:
|
|||||||
|
|
||||||
cd /tmp/infra-update
|
cd /tmp/infra-update
|
||||||
|
|
||||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
|
||||||
"k3s/apps/${SERVICE}/deployment.yaml"
|
|
||||||
|
|
||||||
sed -i "s|gitea.d-ma.be/mathias/ingestion:.*|gitea.d-ma.be/mathias/ingestion:${IMAGE_TAG}|" \
|
sed -i "s|gitea.d-ma.be/mathias/ingestion:.*|gitea.d-ma.be/mathias/ingestion:${IMAGE_TAG}|" \
|
||||||
"k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
"k3s/apps/supervisor/ingestion-deployment.yaml"
|
||||||
|
|
||||||
sed -i "s|gitea.d-ma.be/mathias/routing:.*|gitea.d-ma.be/mathias/routing:${IMAGE_TAG}|" \
|
sed -i "s|gitea.d-ma.be/mathias/routing:.*|gitea.d-ma.be/mathias/routing:${IMAGE_TAG}|" \
|
||||||
"k3s/apps/routing/deployment.yaml"
|
"k3s/apps/routing/deployment.yaml"
|
||||||
|
|
||||||
git config user.email "cd-bot@d-ma.be"
|
git config user.email "cd-bot@d-ma.be"
|
||||||
git config user.name "CD Bot"
|
git config user.name "CD Bot"
|
||||||
git add "k3s/apps/${SERVICE}/deployment.yaml" \
|
git add "k3s/apps/supervisor/ingestion-deployment.yaml" \
|
||||||
"k3s/apps/${SERVICE}/ingestion-deployment.yaml" \
|
|
||||||
"k3s/apps/routing/deployment.yaml"
|
"k3s/apps/routing/deployment.yaml"
|
||||||
git commit -m "chore(deploy): supervisor+ingestion+routing → ${IMAGE_TAG}"
|
git commit -m "chore(deploy): ingestion+routing → ${IMAGE_TAG}"
|
||||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||||
git push
|
git push
|
||||||
|
|
||||||
echo "Infra repo updated: ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
echo "Infra repo updated: ingestion+routing → ${IMAGE_TAG}"
|
||||||
|
|
||||||
- name: Trigger Flux reconcile (immediate)
|
- name: Trigger Flux reconcile (immediate)
|
||||||
run: |
|
run: |
|
||||||
@@ -128,23 +101,6 @@ jobs:
|
|||||||
kubectl -n flux-system annotate kustomization apps \
|
kubectl -n flux-system annotate kustomization apps \
|
||||||
reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite
|
reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite
|
||||||
|
|
||||||
- name: Wait for Flux to apply new supervisor image
|
|
||||||
run: |
|
|
||||||
EXPECTED="gitea.d-ma.be/mathias/supervisor:${{ github.sha }}"
|
|
||||||
for i in $(seq 1 60); do
|
|
||||||
CURRENT=$(kubectl get deploy supervisor -n supervisor \
|
|
||||||
-o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
|
|
||||||
if [ "$CURRENT" = "$EXPECTED" ]; then
|
|
||||||
echo "✓ Flux applied supervisor image after ${i}s"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
kubectl get deploy supervisor -n supervisor \
|
|
||||||
-o jsonpath='{.spec.template.spec.containers[0].image}' \
|
|
||||||
| grep -qx "$EXPECTED" \
|
|
||||||
|| { echo "✗ Flux did not apply supervisor image within 60s"; exit 1; }
|
|
||||||
|
|
||||||
- name: Wait for Flux to apply new ingestion image
|
- name: Wait for Flux to apply new ingestion image
|
||||||
run: |
|
run: |
|
||||||
EXPECTED="gitea.d-ma.be/mathias/ingestion:${{ github.sha }}"
|
EXPECTED="gitea.d-ma.be/mathias/ingestion:${{ github.sha }}"
|
||||||
@@ -162,21 +118,6 @@ jobs:
|
|||||||
| grep -qx "$EXPECTED" \
|
| grep -qx "$EXPECTED" \
|
||||||
|| { echo "✗ Flux did not apply ingestion image within 60s"; exit 1; }
|
|| { echo "✗ Flux did not apply ingestion image within 60s"; exit 1; }
|
||||||
|
|
||||||
- name: Verify supervisor rollout
|
|
||||||
run: |
|
|
||||||
kubectl rollout status deployment/supervisor \
|
|
||||||
--namespace supervisor \
|
|
||||||
--timeout=120s \
|
|
||||||
|| {
|
|
||||||
echo "── pod status ──"
|
|
||||||
kubectl get pods -n supervisor -o wide
|
|
||||||
echo "── events ──"
|
|
||||||
kubectl get events -n supervisor --sort-by='.lastTimestamp' | tail -20
|
|
||||||
echo "── describe ──"
|
|
||||||
kubectl describe pods -n supervisor -l app=supervisor | tail -40
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
- name: Verify ingestion rollout
|
- name: Verify ingestion rollout
|
||||||
run: |
|
run: |
|
||||||
kubectl rollout status deployment/ingestion \
|
kubectl rollout status deployment/ingestion \
|
||||||
@@ -191,3 +132,35 @@ jobs:
|
|||||||
kubectl describe pods -n supervisor -l app=ingestion | tail -40
|
kubectl describe pods -n supervisor -l app=ingestion | tail -40
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- name: Wait for Flux to apply new routing image
|
||||||
|
run: |
|
||||||
|
EXPECTED="gitea.d-ma.be/mathias/routing:${{ github.sha }}"
|
||||||
|
for i in $(seq 1 60); do
|
||||||
|
CURRENT=$(kubectl get deploy routing -n routing \
|
||||||
|
-o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
|
||||||
|
if [ "$CURRENT" = "$EXPECTED" ]; then
|
||||||
|
echo "✓ Flux applied routing image after ${i}s"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
kubectl get deploy routing -n routing \
|
||||||
|
-o jsonpath='{.spec.template.spec.containers[0].image}' \
|
||||||
|
| grep -qx "$EXPECTED" \
|
||||||
|
|| { echo "✗ Flux did not apply routing image within 60s"; exit 1; }
|
||||||
|
|
||||||
|
- name: Verify routing rollout
|
||||||
|
run: |
|
||||||
|
kubectl rollout status deployment/routing \
|
||||||
|
--namespace routing \
|
||||||
|
--timeout=120s \
|
||||||
|
|| {
|
||||||
|
echo "── pod status ──"
|
||||||
|
kubectl get pods -n routing -o wide
|
||||||
|
echo "── events ──"
|
||||||
|
kubectl get events -n routing --sort-by='.lastTimestamp' | tail -20
|
||||||
|
echo "── describe ──"
|
||||||
|
kubectl describe pods -n routing -l app=routing | tail -40
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,12 +1,5 @@
|
|||||||
{
|
{
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"supervisor": {
|
|
||||||
"type": "http",
|
|
||||||
"url": "https://supervisor-mcp.d-ma.be/mcp",
|
|
||||||
"headers": {
|
|
||||||
"Authorization": "Bearer ${SUPERVISOR_MCP_TOKEN}"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"brain": {
|
"brain": {
|
||||||
"type": "http",
|
"type": "http",
|
||||||
"url": "https://brain-mcp.d-ma.be/mcp",
|
"url": "https://brain-mcp.d-ma.be/mcp",
|
||||||
|
|||||||
128
AGENTS.md
128
AGENTS.md
@@ -36,9 +36,18 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
4. **Goal-driven execution.** Define clear success criteria up front for every task.
|
||||||
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
Loop — implement, verify, refine — until those criteria are met. Don't claim
|
||||||
completion without evidence (tests pass, command output, observed behavior).
|
completion without evidence (tests pass, command output, observed behavior).
|
||||||
5. **Branch-per-task for multi-agent repos.** When another agent may be active on
|
5. **Trunk-Based Development — commit directly to main.** Every commit is one
|
||||||
the same repo, create a branch (`agent/<description>`), commit there, and open a
|
logical change (one tool, one fix, one test) with passing tests. Main is always
|
||||||
PR. Do not merge without explicit instruction from Mathias.
|
deployable. Never create long-lived feature branches.
|
||||||
|
|
||||||
|
**Exception — parallel agents on same repo:** If another agent is known to be
|
||||||
|
actively working on the same repo simultaneously, create a short-lived branch
|
||||||
|
(`agent/<description>`), finish the task, and merge to main within the same
|
||||||
|
session. Do not leave agent branches open between sessions.
|
||||||
|
|
||||||
|
**Exception — external contributor or client four-eyes requirement:** Use
|
||||||
|
PR flow only when a human reviewer outside the project is required. Document
|
||||||
|
the reason in PROJECT.md.
|
||||||
|
|
||||||
## Default stack
|
## Default stack
|
||||||
|
|
||||||
@@ -49,9 +58,10 @@ These rules apply to every task across every project, regardless of harness.
|
|||||||
| Build | Task (taskfile.dev) | Make | — |
|
| Build | Task (taskfile.dev) | Make | — |
|
||||||
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
| Containers | Docker Compose (dev), k3s (prod) | — | — |
|
||||||
| DB | PostgreSQL + sqlc | SQLite | — |
|
| DB | PostgreSQL + sqlc | SQLite | — |
|
||||||
| Search | Qdrant (vector), BM25 | — | — |
|
| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — |
|
||||||
| Logging | slog (structured) | — | — |
|
| Logging | slog (structured) | — | — |
|
||||||
| Testing | Table-driven, testify | — | — |
|
| Testing | Table-driven, testify | — | — |
|
||||||
|
| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — |
|
||||||
|
|
||||||
Exploratory: Rust, Zig — I'll tell you when I want these.
|
Exploratory: Rust, Zig — I'll tell you when I want these.
|
||||||
|
|
||||||
@@ -61,9 +71,12 @@ Exploratory: Rust, Zig — I'll tell you when I want these.
|
|||||||
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return
|
||||||
- **Naming**: stdlib conventions, no stuttering
|
- **Naming**: stdlib conventions, no stuttering
|
||||||
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs
|
||||||
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), one concern per PR, PR describes *why* not *what*
|
- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main,
|
||||||
|
one logical change per commit, CI is the quality gate
|
||||||
|
- **Never**: long-lived feature branches, PRs for solo work, direct push without
|
||||||
|
passing `task check` locally first
|
||||||
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config
|
||||||
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc are pre-approved; anything else needs justification in the commit message
|
- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message
|
||||||
|
|
||||||
## Infrastructure
|
## Infrastructure
|
||||||
|
|
||||||
@@ -71,7 +84,7 @@ Three machines on Tailscale:
|
|||||||
|
|
||||||
| Machine | Role | Key specs |
|
| Machine | Role | Key specs |
|
||||||
|---------|------|-----------|
|
|---------|------|-----------|
|
||||||
| koala | GPU inference, heavy compute | RTX 5070, runs llama-swap, Qdrant |
|
| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector |
|
||||||
| iguana | Services, builds | M2 Ultra Mac |
|
| iguana | Services, builds | M2 Ultra Mac |
|
||||||
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
| flamingo | Daily driver, edge | Mac mini, ~/dev is here |
|
||||||
|
|
||||||
@@ -103,18 +116,64 @@ See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project.
|
|||||||
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management
|
||||||
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
- **klimatkollen** (`XT/`) — Swedish municipal climate data platform
|
||||||
|
|
||||||
## Knowledge base
|
## Knowledge base — actively use it
|
||||||
|
|
||||||
When available, agents can query the shared knowledge base:
|
A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions,
|
||||||
|
hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems,
|
||||||
|
Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional
|
||||||
|
reference material — query it actively, not just when explicitly told.**
|
||||||
|
|
||||||
- **MCP**: `mcp://hyperguild.<TAILNET>.ts.net:3100/knowledge`
|
### When to query (treat as a reflex)
|
||||||
- **HTTP**: `http://hyperguild.<TAILNET>.ts.net:3100/api/v1/search`
|
|
||||||
|
|
||||||
<!-- TODO: replace <TAILNET> placeholder with the real Tailscale tailnet
|
- **Before** starting a non-trivial task — search for prior art with the symptom
|
||||||
name once hyperguild is deployed. Until then, agents that try to
|
AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours.
|
||||||
reach the knowledge service on a host where it isn't running will
|
- **When debugging** — search for the error string, the stack frame, the affected
|
||||||
get DNS NXDOMAIN, which is the desired fail-loudly behavior. -->
|
service. Past you may have already paid this tax.
|
||||||
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`
|
- **Before adopting** a pattern, library, framework, or model name — check if it
|
||||||
|
was tried and rejected, or what the integration footguns are.
|
||||||
|
- **When making architectural decisions** — search for the domain + "ADR" or
|
||||||
|
"decision" to find prior reasoning before re-deriving it.
|
||||||
|
- **When a recommendation feels novel** — challenge yourself: "has this been
|
||||||
|
documented?" The brain often has it.
|
||||||
|
|
||||||
|
### When to write
|
||||||
|
|
||||||
|
After you discover something that **future-you would forget** and that **isn't
|
||||||
|
recoverable from the code, git log, or PR description alone**:
|
||||||
|
|
||||||
|
- Bugs whose root cause is non-obvious and generalisable beyond this project.
|
||||||
|
- Framework / library / model-name quirks that bit you and would bite anyone.
|
||||||
|
- Design principles validated under fire (e.g. "every `_get` needs a `_list`").
|
||||||
|
- Postmortems for incidents: what broke, why, how diagnosed, what to do next time.
|
||||||
|
|
||||||
|
DON'T write project status, sprint progress, PR summaries, or "what I did this
|
||||||
|
session" — those rot fast and the originals are in git/gitea anyway. Brain
|
||||||
|
entries that age well are about *why*, *how to avoid*, and *what to do when*.
|
||||||
|
|
||||||
|
### How to access (per harness)
|
||||||
|
|
||||||
|
| Harness | Query | Write |
|
||||||
|
|---------|-------|-------|
|
||||||
|
| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool |
|
||||||
|
| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same |
|
||||||
|
| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` |
|
||||||
|
| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files |
|
||||||
|
|
||||||
|
- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`.
|
||||||
|
- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as
|
||||||
|
fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml`
|
||||||
|
on the koala k3s cluster; don't hardcode local-only model names into the
|
||||||
|
berget URL (see knowledge entry on namespace mismatches).
|
||||||
|
|
||||||
|
### Quick reflex checks
|
||||||
|
|
||||||
|
If you find yourself about to say any of these out loud, you owe yourself a brain query first:
|
||||||
|
|
||||||
|
- "I think the issue might be..."
|
||||||
|
- "Let me try X and see..."
|
||||||
|
- "I'll just write a script to..."
|
||||||
|
- "This is probably a new bug..."
|
||||||
|
- "Has anyone done this before?" — *yes, probably, go check.*
|
||||||
|
|
||||||
## Client work rules
|
## Client work rules
|
||||||
|
|
||||||
@@ -221,31 +280,28 @@ Key skills:
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
37
CLAUDE.md
37
CLAUDE.md
@@ -47,31 +47,28 @@
|
|||||||
|
|
||||||
## MCP endpoints
|
## MCP endpoints
|
||||||
|
|
||||||
Two MCP servers expose this project's tooling, both reachable over Tailscale:
|
Two MCP servers are live, both reachable over Tailscale and via HTTPS domain:
|
||||||
|
|
||||||
- **`brain`** at `http://koala:30330/mcp` — preferred path for `brain_query`,
|
- **`brain`** at `https://brain-mcp.d-ma.be/mcp` (NodePort `koala:30330`) —
|
||||||
`brain_write`, `brain_ingest`, `brain_ingest_raw`, and `session_log`. Hosted
|
`brain_query`, `brain_write`, `brain_ingest`, `brain_ingest_raw`,
|
||||||
by the ingestion service directly.
|
`brain_answer`, `brain_classify`, `session_log`. Hosted by the ingestion
|
||||||
- **`supervisor`** at `http://koala:30320/mcp` — skill workers (`tdd_red`,
|
service. Auth: Dex JWT (claude.ai OAuth) or static `BRAIN_MCP_TOKEN`.
|
||||||
`tdd_green`, `tdd_refactor`, `review`, `debug`, `spec`, `retrospective`,
|
|
||||||
`trainer`, `tier`). Will shrink as skill workers move to SKILL.md in a later
|
|
||||||
migration.
|
|
||||||
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
- **`routing`** at `http://koala:30310/mcp` — Mode 2 routing pod. Advertises
|
||||||
the same four cost-routable skills as the supervisor (`review`, `debug`,
|
`review`, `debug`, `retrospective`, `trainer`; per-call routes to local model
|
||||||
`retrospective`, `trainer`) but per-call decides whether to use a local
|
or Claude based on brain `/pass-rate`. Bearer auth via `ROUTING_MCP_TOKEN`
|
||||||
model or Claude based on the brain's `/pass-rate` response. Bearer auth
|
(opt-in). Only `mode client-local` registers this endpoint.
|
||||||
via `ROUTING_MCP_TOKEN` (opt-in). Only `mode client-local` registers this
|
|
||||||
endpoint; Mode 1 and Mode 3 do not.
|
The supervisor MCP (`koala:30320`) was retired in Plan 7 (2026-05-12). Its
|
||||||
|
skill workers (`tdd`, `spec`) are now SKILL.md files; routed skills moved to
|
||||||
|
the routing pod; brain tools moved to the brain MCP.
|
||||||
|
|
||||||
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
The brain HTTP REST API (`/query`, `/write`, `/ingest`, `/ingest-raw`,
|
||||||
`/ingest-path`, `/backfill-refs`) remains available on the same port (3300) for
|
`/ingest-path`, `/backfill-refs`, `/pass-rate`) remains available on port 3300
|
||||||
shell scripts and non-MCP clients.
|
for shell scripts and non-MCP clients.
|
||||||
|
|
||||||
The brain HTTP REST API also serves a read-only `GET /pass-rate?skill=X&window=Y`
|
`brain_answer(query)` performs BM25 retrieval + LLM synthesis (berget.ai
|
||||||
endpoint that aggregates `final_status` counts from session logs and returns
|
gemma4:31b → iguana fallback). `brain_classify(text)` infers doc type, title,
|
||||||
`{skill, window, pass, fail, skip, total, pass_rate}`. Plan 6 (routing pod)
|
and tags. Both require `BRAIN_LLM_PRIMARY_URL` to be set in the ingestion pod.
|
||||||
reads this to decide whether to route skill calls to local models. Pass rate
|
|
||||||
is `null` when no logged invocations are in the window.
|
|
||||||
|
|
||||||
## Agent instructions
|
## Agent instructions
|
||||||
|
|
||||||
|
|||||||
98
DECISIONS.md
98
DECISIONS.md
@@ -72,23 +72,42 @@ Record *why* things are the way they are. Future-you will thank present-you.
|
|||||||
Plan 6 (Mode 2 routing pod, 2026-05-04) introduces a second consumer of
|
Plan 6 (Mode 2 routing pod, 2026-05-04) introduces a second consumer of
|
||||||
the four cost-routable skill packages. The routing pod constructs each
|
the four cost-routable skill packages. The routing pod constructs each
|
||||||
skill via `<pkg>.New(Config{...})` and hands it `routing.Router.Run` as
|
skill via `<pkg>.New(Config{...})` and hands it `routing.Router.Run` as
|
||||||
the `CompleteFunc`. Plan 7 (supervisor retirement) MUST NOT delete the
|
the `CompleteFunc`.
|
||||||
four packages.
|
|
||||||
|
|
||||||
**Plan 7's allowed deletions:**
|
**Preserved code (do not delete):**
|
||||||
- `internal/skills/{tdd,spec,tier}/` (not consumed by the routing pod)
|
|
||||||
- `cmd/supervisor/` (binary)
|
|
||||||
- `Dockerfile` (supervisor's, at repo root — distinct from `Dockerfile.routing`)
|
|
||||||
- supervisor manifests in the infra repo
|
|
||||||
- NodePort `:30320`
|
|
||||||
|
|
||||||
**Plan 7's preserved code:**
|
|
||||||
- `internal/skills/{review,debug,retrospective,trainer}/`
|
- `internal/skills/{review,debug,retrospective,trainer}/`
|
||||||
- `internal/registry`
|
- `internal/registry`, `internal/mcp`, `internal/exec/litellm.go`
|
||||||
- `internal/mcp`
|
- `internal/routing/`, `cmd/routing/`
|
||||||
- `internal/exec/litellm.go`
|
|
||||||
- `internal/routing/` (entirely new in Plan 6)
|
---
|
||||||
- `cmd/routing/`
|
|
||||||
|
## Plan 7: supervisor pod retired (2026-05-12)
|
||||||
|
|
||||||
|
**What was deleted:** `cmd/supervisor/`, `internal/skills/{tdd,spec}/`,
|
||||||
|
root `Dockerfile`, supervisor k8s manifests (Deployment, Service, Ingress,
|
||||||
|
NodePort 30320), `supervisor` entry removed from all `.mcp.json` configs.
|
||||||
|
|
||||||
|
**Coverage:** `tdd`/`spec` → SKILL.md files in `~/dev/.skills/`; `review`,
|
||||||
|
`debug`, `retrospective`, `trainer` → routing pod; `brain_*`/`session_log` →
|
||||||
|
brain MCP; `tier` → `hyperguild tier` CLI.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-05-12 — brain_answer and brain_classify: LLM routing via berget.ai → iguana
|
||||||
|
|
||||||
|
**Context:** Brain MCP returned raw BM25 excerpts with no synthesis. Adding
|
||||||
|
LLM-backed tools enables Q&A and ingestion enrichment without a separate service.
|
||||||
|
|
||||||
|
**Decision:** Two new MCP tools in the ingestion service (`ingestion/internal/mcp/`):
|
||||||
|
- `brain_answer(query)` — BM25 top-10 → LLM synthesis → answer + sources
|
||||||
|
- `brain_classify(text)` — LLM classifies doc into type/title/tags
|
||||||
|
|
||||||
|
Primary LLM: berget.ai `gemma4:31b` (EU cloud, spend tokens while available).
|
||||||
|
Fallback: iguana `gemma4:31b` (local Ollama). Reranker deferred to follow-up.
|
||||||
|
Router lives in `ingestion/internal/llm.Router`; opt-in via `BRAIN_LLM_PRIMARY_URL`.
|
||||||
|
|
||||||
|
**Consequences:** Brain becomes a knowledge assistant, not just a search index.
|
||||||
|
When berget.ai tokens run out, flip `BRAIN_LLM_PRIMARY_URL` to iguana.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -99,3 +118,52 @@ four packages.
|
|||||||
**Decision**: The root context-sync generates a `mathias.md` prompt and `mathias.toml` agent config in `~/.vibe/`. This is the one tool that needs a custom adapter path.
|
**Decision**: The root context-sync generates a `mathias.md` prompt and `mathias.toml` agent config in `~/.vibe/`. This is the one tool that needs a custom adapter path.
|
||||||
|
|
||||||
**Consequences**: Run `vibe --agent mathias` to use your conventions. Other Vibe users on the machine aren't affected.
|
**Consequences**: Run `vibe --agent mathias` to use your conventions. Other Vibe users on the machine aren't affected.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-05-18 — project_create commits staging namespace directly to infra main
|
||||||
|
|
||||||
|
**Context:** `project_create` writes a k8s namespace manifest into the infra
|
||||||
|
repo so Flux brings up a staging environment for the new project. Initial
|
||||||
|
implementation pushed to a `staging/<name>` branch, which required manual PR
|
||||||
|
merge before Flux saw the namespace — defeating the "one tool call, project
|
||||||
|
exists, staging reconciling within 60s" goal.
|
||||||
|
|
||||||
|
**Decision:** Option A — commit directly to `main`. `callInfraCommit` passes
|
||||||
|
`branch: "main"` to gitea-mcp's `file_write_branch`; no PR, no merge step.
|
||||||
|
|
||||||
|
**Consequences:** Staging namespace appears in cluster within ~60s of the
|
||||||
|
`project_create` call. Consistent with project-wide TBD policy (CLAUDE.md):
|
||||||
|
commit directly to main, every commit deployable. Acceptable because the
|
||||||
|
manifest is a fresh namespace under `k3s/staging/<name>/` — isolated, low
|
||||||
|
blast-radius, and Flux will simply recreate it if the file is bad. Manual
|
||||||
|
review gating was friction for no compensating safety gain on experiment
|
||||||
|
namespaces.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2026-05-18 — pgvector over Qdrant for brain hybrid retrieval (supersedes 2026-04-08)
|
||||||
|
|
||||||
|
**Context:** The 2026-04-08 ADR chose Qdrant for vector store. Since then,
|
||||||
|
postgres18 with pgvector has been deployed in the `databases` namespace on
|
||||||
|
koala and is already the shared default for the rest of the project
|
||||||
|
(CLAUDE.md lists `pgvector (vector), BM25` as the primary search layer and
|
||||||
|
Qdrant only as a fallback "when >1M vectors or hybrid retrieval"). Qdrant
|
||||||
|
itself has never been deployed — `kubectl get` finds no pod, service, or
|
||||||
|
manifest. Standing up a new vector engine for a single consumer is friction
|
||||||
|
that the original ADR did not weigh.
|
||||||
|
|
||||||
|
**Decision:** Use pgvector for brain hybrid retrieval. Issue #8 — and any
|
||||||
|
follow-on embedding work — targets the existing `postgres18` instance:
|
||||||
|
|
||||||
|
- one table `brain_embeddings(path TEXT PRIMARY KEY, embedding VECTOR(768), updated_at TIMESTAMPTZ)`,
|
||||||
|
IVFFlat or HNSW index by feel once volume warrants
|
||||||
|
- BM25 stays as today (file walk + token frequency); cosine via pgvector
|
||||||
|
- hybrid scoring done in SQL or Go; pick once we measure
|
||||||
|
- nomic-embed-text on iguana ollama provides 768-dim vectors
|
||||||
|
|
||||||
|
**Consequences:** One database engine instead of two. Backups, monitoring,
|
||||||
|
and connection pooling already solved. Trade-off: pgvector at >1M vectors
|
||||||
|
or under hybrid-search load may underperform Qdrant — revisit only when
|
||||||
|
benchmarks hurt. The 2026-04-08 ADR is superseded for the brain use case;
|
||||||
|
Qdrant remains the noted fallback path in CLAUDE.md if scale demands it.
|
||||||
|
|||||||
50
Dockerfile
50
Dockerfile
@@ -1,50 +0,0 @@
|
|||||||
# syntax=docker/dockerfile:1
|
|
||||||
|
|
||||||
# ── Build stage ───────────────────────────────────────────────────────────────
|
|
||||||
FROM golang:1.26-bookworm AS builder
|
|
||||||
|
|
||||||
ARG VERSION=dev
|
|
||||||
WORKDIR /src
|
|
||||||
|
|
||||||
COPY go.mod go.sum ./
|
|
||||||
RUN go mod download
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
|
||||||
go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \
|
|
||||||
-o /out/supervisor ./cmd/supervisor
|
|
||||||
|
|
||||||
# ── Runtime stage ─────────────────────────────────────────────────────────────
|
|
||||||
# Node.js 22 slim — needed for claude CLI subprocess
|
|
||||||
FROM node:22-slim
|
|
||||||
|
|
||||||
# Install claude CLI (provides the `claude` binary the supervisor shells out to)
|
|
||||||
RUN npm install -g @anthropic-ai/claude-code \
|
|
||||||
&& claude --version \
|
|
||||||
&& echo "claude CLI installed"
|
|
||||||
|
|
||||||
# Copy supervisor binary
|
|
||||||
COPY --from=builder /out/supervisor /usr/local/bin/supervisor
|
|
||||||
|
|
||||||
# Bake in config (models.yaml + skill discipline files)
|
|
||||||
COPY config/ /app/config/
|
|
||||||
|
|
||||||
# Run as non-root
|
|
||||||
RUN groupadd -r supervisor && useradd -r -g supervisor -d /app supervisor
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# brain/ is writable state — mount a PersistentVolume here
|
|
||||||
VOLUME /app/brain
|
|
||||||
|
|
||||||
ENV SUPERVISOR_CONFIG_DIR=/app/config/supervisor
|
|
||||||
ENV SUPERVISOR_MODELS_FILE=/app/config/models.yaml
|
|
||||||
ENV SUPERVISOR_BRAIN_DIR=/app/brain
|
|
||||||
ENV SUPERVISOR_SESSIONS_DIR=/app/brain/sessions
|
|
||||||
ENV SUPERVISOR_PORT=3200
|
|
||||||
|
|
||||||
USER supervisor
|
|
||||||
|
|
||||||
EXPOSE 3200
|
|
||||||
|
|
||||||
ENTRYPOINT ["/usr/local/bin/supervisor"]
|
|
||||||
@@ -14,12 +14,16 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/auth"
|
||||||
"github.com/mathiasbq/supervisor/internal/config"
|
"github.com/mathiasbq/supervisor/internal/config"
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/githubclient"
|
||||||
"github.com/mathiasbq/supervisor/internal/mcp"
|
"github.com/mathiasbq/supervisor/internal/mcp"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/mcpclient"
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
"github.com/mathiasbq/supervisor/internal/routing"
|
"github.com/mathiasbq/supervisor/internal/routing"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
"github.com/mathiasbq/supervisor/internal/skills/debug"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/project"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/review"
|
"github.com/mathiasbq/supervisor/internal/skills/review"
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
||||||
@@ -98,13 +102,56 @@ func main() {
|
|||||||
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
CompleteFunc: trainer.CompleteFunc(wrap("trainer")),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
srv := mcp.NewServer(reg, cfg.MCPAuthToken)
|
if cfg.GiteaMCPURL != "" {
|
||||||
|
mcpC, err := mcpclient.New(cfg.GiteaMCPURL, cfg.GiteaMCPToken)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("mcpclient init for project_create — GITEA_MCP_URL is set but GITEA_MCP_TOKEN is empty (check routing-secrets)", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
var ghClient *githubclient.Client
|
||||||
|
if cfg.GitHubPAT != "" {
|
||||||
|
ghClient = githubclient.New(cfg.GitHubPAT)
|
||||||
|
}
|
||||||
|
reg.Register(project.New(project.Config{
|
||||||
|
Client: mcpC,
|
||||||
|
GitHub: ghClient,
|
||||||
|
GiteaOwner: cfg.GiteaOwner,
|
||||||
|
GitHubOwner: cfg.GitHubOwner,
|
||||||
|
GitHubPAT: cfg.GitHubPAT,
|
||||||
|
InfraRepo: cfg.InfraRepo,
|
||||||
|
}))
|
||||||
|
logger.Info("project_create registered", "gitea_mcp_url", cfg.GiteaMCPURL,
|
||||||
|
"gitea_owner", cfg.GiteaOwner, "github_owner", cfg.GitHubOwner,
|
||||||
|
"infra_repo", cfg.InfraRepo, "github_pat_set", cfg.GitHubPAT != "")
|
||||||
|
} else {
|
||||||
|
logger.Info("project_create skipped — GITEA_MCP_URL not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
var validator *auth.Validator
|
||||||
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
||||||
|
audience := os.Getenv("MCP_AUDIENCE")
|
||||||
|
v, err := auth.NewValidator(dexURL, audience)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("build jwt validator", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
validator = v
|
||||||
|
logger.Info("jwt auth enabled", "issuer", dexURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
srv := mcp.NewServer(reg, cfg.MCPAuthToken, validator)
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.Handle("/mcp", srv)
|
mux.Handle("/mcp", srv)
|
||||||
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
||||||
|
resourceURL := os.Getenv("MCP_RESOURCE_URL")
|
||||||
|
mux.HandleFunc("GET /.well-known/oauth-protected-resource",
|
||||||
|
auth.ProtectedResourceHandler(resourceURL, dexURL))
|
||||||
|
}
|
||||||
|
|
||||||
addr := ":" + cfg.Port
|
addr := ":" + cfg.Port
|
||||||
logger.Info("routing pod starting", "addr", addr,
|
logger.Info("routing pod starting", "addr", addr,
|
||||||
"fast", cfg.FastModel, "thinking", cfg.ThinkingModel,
|
"fast", cfg.FastModel, "thinking", cfg.ThinkingModel,
|
||||||
|
|||||||
@@ -4,9 +4,12 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
@@ -42,28 +45,33 @@ func TestRoutingPodEndToEnd(t *testing.T) {
|
|||||||
}))
|
}))
|
||||||
defer brain.Close()
|
defer brain.Close()
|
||||||
|
|
||||||
|
port := freePort(t)
|
||||||
|
addr := "127.0.0.1:" + port
|
||||||
|
baseURL := "http://" + addr
|
||||||
|
|
||||||
bin := buildRouting(t)
|
bin := buildRouting(t)
|
||||||
cmd := exec.Command(bin)
|
cmd := exec.Command(bin)
|
||||||
cmd.Env = append(cmd.Env,
|
cmd.Env = []string{
|
||||||
"ROUTING_PORT=33310",
|
"ROUTING_PORT=" + port,
|
||||||
"LITELLM_BASE_URL="+llm.URL,
|
"LITELLM_BASE_URL=" + llm.URL,
|
||||||
"LITELLM_API_KEY=stub",
|
"LITELLM_API_KEY=stub",
|
||||||
"BRAIN_URL="+brain.URL,
|
"BRAIN_URL=" + brain.URL,
|
||||||
"SUPERVISOR_CONFIG_DIR=../../config/supervisor",
|
"SUPERVISOR_CONFIG_DIR=../../config/supervisor",
|
||||||
"PATH="+osPath(),
|
"PATH=" + os.Getenv("PATH"),
|
||||||
)
|
"HOME=" + os.Getenv("HOME"),
|
||||||
|
}
|
||||||
require.NoError(t, cmd.Start())
|
require.NoError(t, cmd.Start())
|
||||||
t.Cleanup(func() { _ = cmd.Process.Kill() })
|
t.Cleanup(func() { _ = cmd.Process.Kill() })
|
||||||
|
|
||||||
require.NoError(t, waitForPort(t, "127.0.0.1:33310", 5*time.Second))
|
require.NoError(t, waitForPort(t, addr, 30*time.Second))
|
||||||
|
|
||||||
resp := mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`)
|
resp := mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":1,"method":"tools/list"}`)
|
||||||
assert.Contains(t, resp, `"review"`)
|
assert.Contains(t, resp, `"review"`)
|
||||||
assert.Contains(t, resp, `"debug"`)
|
assert.Contains(t, resp, `"debug"`)
|
||||||
assert.Contains(t, resp, `"retrospective"`)
|
assert.Contains(t, resp, `"retrospective"`)
|
||||||
assert.Contains(t, resp, `"trainer"`)
|
assert.Contains(t, resp, `"trainer"`)
|
||||||
|
|
||||||
resp = mcpCall(t, "http://127.0.0.1:33310/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`)
|
resp = mcpCall(t, baseURL+"/mcp", `{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"review","arguments":{"project_root":"/tmp","files":["README.md"]}}}`)
|
||||||
_ = resp // shape varies by skill; we only need a 200
|
_ = resp // shape varies by skill; we only need a 200
|
||||||
|
|
||||||
// Wait briefly for the async session_log to land.
|
// Wait briefly for the async session_log to land.
|
||||||
@@ -113,11 +121,15 @@ func mcpCall(t *testing.T, url, body string) string {
|
|||||||
return string(raw)
|
return string(raw)
|
||||||
}
|
}
|
||||||
|
|
||||||
func osPath() string {
|
// freePort grabs an OS-assigned TCP port and releases it. There is a small
|
||||||
for _, e := range append([]string{}, exec.Command("env").Env...) {
|
// race window before the subprocess re-binds it, but it is acceptable for
|
||||||
if strings.HasPrefix(e, "PATH=") {
|
// test isolation against a hardcoded port colliding with another test or
|
||||||
return strings.TrimPrefix(e, "PATH=")
|
// stray process.
|
||||||
}
|
func freePort(t *testing.T) string {
|
||||||
}
|
t.Helper()
|
||||||
return "/usr/bin:/bin"
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
port := l.Addr().(*net.TCPAddr).Port
|
||||||
|
require.NoError(t, l.Close())
|
||||||
|
return strconv.Itoa(port)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,163 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"log/slog"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/config"
|
|
||||||
iexec "github.com/mathiasbq/supervisor/internal/exec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/mcp"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/brain"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/org"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/retrospective"
|
|
||||||
skilldebug "github.com/mathiasbq/supervisor/internal/skills/debug"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/review"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/trainer"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/sessionlog"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/tier"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
|
|
||||||
|
|
||||||
cfg, err := config.Load()
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("load config", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
models, err := config.LoadModels(cfg.ModelsFile)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("load models", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
protocolsPrompt, err := os.ReadFile(cfg.ConfigDir + "/protocols.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read protocols.md", "path", cfg.ConfigDir+"/protocols.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// prependProtocols prepends the shared protocols to a skill discipline file.
|
|
||||||
prependProtocols := func(skillPrompt []byte) string {
|
|
||||||
return string(protocolsPrompt) + "\n---\n\n" + string(skillPrompt)
|
|
||||||
}
|
|
||||||
|
|
||||||
tddPrompt, err := os.ReadFile(cfg.ConfigDir + "/tdd.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read tdd.md", "path", cfg.ConfigDir+"/tdd.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
retroPrompt, err := os.ReadFile(cfg.ConfigDir + "/retrospective.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read retrospective.md", "path", cfg.ConfigDir+"/retrospective.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
reviewPrompt, err := os.ReadFile(cfg.ConfigDir + "/review.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read review.md", "path", cfg.ConfigDir+"/review.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
debugPrompt, err := os.ReadFile(cfg.ConfigDir + "/debug.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read debug.md", "path", cfg.ConfigDir+"/debug.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
specPrompt, err := os.ReadFile(cfg.ConfigDir + "/spec.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read spec.md", "path", cfg.ConfigDir+"/spec.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
trainerReaderPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-reader.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read trainer-reader.md", "path", cfg.ConfigDir+"/trainer-reader.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
trainerWriterPrompt, err := os.ReadFile(cfg.ConfigDir + "/trainer-writer.md")
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("read trainer-writer.md", "path", cfg.ConfigDir+"/trainer-writer.md", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
litellm := iexec.NewLiteLLM(cfg.LiteLLMBaseURL, cfg.LiteLLMAPIKey, 0)
|
|
||||||
|
|
||||||
tierFn := func(ctx context.Context) tier.Info {
|
|
||||||
return tier.Detect(ctx, "https://api.anthropic.com", cfg.LiteLLMBaseURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
reg := registry.New()
|
|
||||||
reg.Register(tdd.New(tdd.Config{
|
|
||||||
SkillPrompt: prependProtocols(tddPrompt),
|
|
||||||
DefaultModel: models.ModelFor("tdd", ""),
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
|
||||||
}))
|
|
||||||
reg.Register(brain.New(brain.Config{
|
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
|
||||||
IngestSvcURL: cfg.IngestSvcURL,
|
|
||||||
KBRetrievalURL: cfg.KBRetrievalURL,
|
|
||||||
}))
|
|
||||||
reg.Register(org.New(org.Config{
|
|
||||||
TierFn: tierFn,
|
|
||||||
}))
|
|
||||||
reg.Register(sessionlog.New(sessionlog.Config{
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
}))
|
|
||||||
reg.Register(retrospective.New(retrospective.Config{
|
|
||||||
SkillPrompt: prependProtocols(retroPrompt),
|
|
||||||
DefaultModel: models.ModelFor("retrospective", ""),
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
}))
|
|
||||||
reg.Register(review.New(review.Config{
|
|
||||||
SkillPrompt: prependProtocols(reviewPrompt),
|
|
||||||
DefaultModel: models.ModelFor("review", ""),
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
|
||||||
}))
|
|
||||||
reg.Register(skilldebug.New(skilldebug.Config{
|
|
||||||
SkillPrompt: prependProtocols(debugPrompt),
|
|
||||||
DefaultModel: models.ModelFor("debug", ""),
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
|
||||||
}))
|
|
||||||
reg.Register(spec.New(spec.Config{
|
|
||||||
SkillPrompt: prependProtocols(specPrompt),
|
|
||||||
DefaultModel: models.ModelFor("spec", ""),
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
IngestBaseURL: cfg.IngestBaseURL,
|
|
||||||
}))
|
|
||||||
reg.Register(trainer.New(trainer.Config{
|
|
||||||
ReaderPrompt: prependProtocols(trainerReaderPrompt),
|
|
||||||
WriterPrompt: prependProtocols(trainerWriterPrompt),
|
|
||||||
DefaultModel: models.ModelFor("trainer", ""),
|
|
||||||
CompleteFunc: litellm.Complete,
|
|
||||||
SessionsDir: cfg.SessionsDir,
|
|
||||||
BrainDir: cfg.BrainDir,
|
|
||||||
}))
|
|
||||||
|
|
||||||
srv := mcp.NewServer(reg, cfg.MCPAuthToken)
|
|
||||||
mux := http.NewServeMux()
|
|
||||||
mux.Handle("/mcp", srv)
|
|
||||||
|
|
||||||
addr := ":" + cfg.Port
|
|
||||||
logger.Info("supervisor starting", "addr", addr, "version", "v0.5.0")
|
|
||||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
|
||||||
logger.Error("server stopped", "err", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os/exec"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestBinaryCompiles(t *testing.T) {
|
|
||||||
cmd := exec.Command("go", "build", "./...")
|
|
||||||
out, err := cmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("build failed: %s\n%s", err, out)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
17
go.mod
17
go.mod
@@ -2,10 +2,23 @@ module github.com/mathiasbq/supervisor
|
|||||||
|
|
||||||
go 1.26.1
|
go 1.26.1
|
||||||
|
|
||||||
require github.com/stretchr/testify v1.11.1
|
require (
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6
|
||||||
|
github.com/stretchr/testify v1.11.1
|
||||||
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
|
||||||
|
github.com/goccy/go-json v0.10.3 // indirect
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3 // indirect
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1 // indirect
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6 // indirect
|
||||||
|
github.com/lestrrat-go/iter v1.0.2 // indirect
|
||||||
|
github.com/lestrrat-go/option v1.0.1 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
github.com/segmentio/asm v1.2.0 // indirect
|
||||||
|
golang.org/x/crypto v0.32.0 // indirect
|
||||||
|
golang.org/x/sys v0.31.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
27
go.sum
27
go.sum
@@ -1,10 +1,37 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc=
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||||
|
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||||
|
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3 h1:94HXkVLxkZO9vJI/w2u1T0DAoprShFd13xtnSINtDWs=
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3/go.mod h1:6AWFyKNNj0zEXQYfTMPfZrAXUWUfTIZ5ECEUEJaijtw=
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE=
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E=
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6 h1:qgmgIRhpvBqexMJjA/PmwSvhNk679oqD1RbovdCGW8k=
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6/go.mod h1:mwwz3JMTPBjHUkkDv/IGJ39aALInZLrhBp0X7KGUZlo=
|
||||||
|
github.com/lestrrat-go/iter v1.0.2 h1:gMXo1q4c2pHmC3dn8LzRhJfP1ceCbgSiT9lUydIzltI=
|
||||||
|
github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6 h1:hxM1gfDILk/l5ylers6BX/Eq1m/pnxe9NBwW6lVfecA=
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6/go.mod h1:Y722kU5r/8mV7fYDifjug0r8FK8mZdw0K0GpJw/l8pU=
|
||||||
|
github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU=
|
||||||
|
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
|
||||||
|
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
|
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
|
||||||
|
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
|
||||||
|
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
||||||
|
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|||||||
@@ -6,17 +6,53 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/llm"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/embed"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/watcher"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/watcher"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// redactDSN parses a Postgres URL and replaces its password with `***`
|
||||||
|
// for safe inclusion in logs. Falls back to a non-leaking placeholder
|
||||||
|
// if parsing fails — we never log a raw DSN.
|
||||||
|
func redactDSN(dsn string) string {
|
||||||
|
u, err := url.Parse(dsn)
|
||||||
|
if err != nil || u.User == nil {
|
||||||
|
return "postgres://***"
|
||||||
|
}
|
||||||
|
return u.Redacted()
|
||||||
|
}
|
||||||
|
|
||||||
|
// vectorAdapter bridges *vectorstore.PGStore (returns []vectorstore.Hit)
|
||||||
|
// to the search.VectorSearcher interface (which uses []search.VectorHit).
|
||||||
|
// Kept here, not in either package, so neither has to import the other.
|
||||||
|
type vectorAdapter struct{ s *vectorstore.PGStore }
|
||||||
|
|
||||||
|
func (a vectorAdapter) Search(ctx context.Context, q []float32, limit int) ([]search.VectorHit, error) {
|
||||||
|
hits, err := a.s.Search(ctx, q, limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out := make([]search.VectorHit, len(hits))
|
||||||
|
for i, h := range hits {
|
||||||
|
out[i] = search.VectorHit{Path: h.Path, Distance: h.Distance}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
func envOr(key, fallback string) string {
|
func envOr(key, fallback string) string {
|
||||||
if v := os.Getenv(key); v != "" {
|
if v := os.Getenv(key); v != "" {
|
||||||
return v
|
return v
|
||||||
@@ -55,7 +91,61 @@ func main() {
|
|||||||
|
|
||||||
h := api.NewHandler(brainDir, logger, pipelineCfg)
|
h := api.NewHandler(brainDir, logger, pipelineCfg)
|
||||||
|
|
||||||
mcpSrv := mcp.NewServer(brainDir, &pipelineCfg, llmClient.Complete)
|
var answerComplete pipeline.CompleteFunc
|
||||||
|
if primaryURL := os.Getenv("BRAIN_LLM_PRIMARY_URL"); primaryURL != "" {
|
||||||
|
primaryModel := envOr("BRAIN_LLM_PRIMARY_MODEL", "gemma4:31b")
|
||||||
|
primaryKey := os.Getenv("BERGET_API_KEY")
|
||||||
|
timeoutMS := envInt("BRAIN_LLM_TIMEOUT_MS", 10000)
|
||||||
|
timeout := time.Duration(timeoutMS) * time.Millisecond
|
||||||
|
|
||||||
|
primary := llm.New(primaryURL, primaryKey, primaryModel, timeout)
|
||||||
|
router := &llm.Router{Primary: primary}
|
||||||
|
|
||||||
|
if fallbackURL := os.Getenv("BRAIN_LLM_FALLBACK_URL"); fallbackURL != "" {
|
||||||
|
fallbackModel := envOr("BRAIN_LLM_FALLBACK_MODEL", "gemma4:31b")
|
||||||
|
router.Fallback = llm.New(fallbackURL, "", fallbackModel, timeout)
|
||||||
|
}
|
||||||
|
answerComplete = router.Complete
|
||||||
|
logger.Info("brain answer LLM configured", "primary", primaryURL, "model", primaryModel)
|
||||||
|
}
|
||||||
|
|
||||||
|
mcpSrv := mcp.NewServer(brainDir, &pipelineCfg, llmClient.Complete, answerComplete)
|
||||||
|
if rerankURL := os.Getenv("BRAIN_RERANKER_URL"); rerankURL != "" {
|
||||||
|
rerankModel := envOr("BRAIN_RERANKER_MODEL", "dengcao/Qwen3-Reranker-0.6B:F16")
|
||||||
|
mcpSrv = mcpSrv.WithReranker(reranker.New(rerankURL, rerankModel))
|
||||||
|
logger.Info("brain reranker configured", "url", rerankURL, "model", rerankModel)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hybrid retrieval (pgvector + nomic-embed-text). Both env vars must
|
||||||
|
// be set together for the path to wire on; otherwise BM25-only.
|
||||||
|
var vectorStore *vectorstore.PGStore
|
||||||
|
pgDSN := os.Getenv("BRAIN_PG_DSN")
|
||||||
|
embedURL := os.Getenv("BRAIN_EMBED_URL")
|
||||||
|
switch {
|
||||||
|
case pgDSN != "" && embedURL != "":
|
||||||
|
embedModel := envOr("BRAIN_EMBED_MODEL", "nomic-embed-text:latest")
|
||||||
|
store, err := vectorstore.New(context.Background(), pgDSN)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("vector store init", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
if err := store.Init(context.Background()); err != nil {
|
||||||
|
logger.Error("vector store migrate", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
vectorStore = store
|
||||||
|
embedder := embed.New(embedURL, embedModel)
|
||||||
|
mcpSrv = mcpSrv.WithHybridRetrieval(vectorAdapter{s: store}, embedder)
|
||||||
|
h.WithEmbedSync(store, embedder)
|
||||||
|
logger.Info("brain hybrid retrieval enabled",
|
||||||
|
"pg", redactDSN(pgDSN),
|
||||||
|
"embed_url", embedURL, "embed_model", embedModel)
|
||||||
|
case pgDSN == "" && embedURL == "":
|
||||||
|
// disabled — fine
|
||||||
|
default:
|
||||||
|
logger.Error("BRAIN_PG_DSN and BRAIN_EMBED_URL must be set together")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
mcpToken := os.Getenv("BRAIN_MCP_TOKEN")
|
mcpToken := os.Getenv("BRAIN_MCP_TOKEN")
|
||||||
if mcpToken == "" {
|
if mcpToken == "" {
|
||||||
@@ -71,16 +161,79 @@ func main() {
|
|||||||
Pipeline: pipelineCfg,
|
Pipeline: pipelineCfg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
if vectorStore != nil {
|
||||||
|
embedSyncInterval := envInt("BRAIN_EMBED_SYNC_INTERVAL", 300)
|
||||||
|
vectorstore.StartSync(ctx, brainDir, vectorStore,
|
||||||
|
embed.New(os.Getenv("BRAIN_EMBED_URL"),
|
||||||
|
envOr("BRAIN_EMBED_MODEL", "nomic-embed-text:latest")),
|
||||||
|
time.Duration(embedSyncInterval)*time.Second)
|
||||||
|
logger.Info("embed sync started", "interval_s", embedSyncInterval)
|
||||||
|
}
|
||||||
|
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("POST /query", h.Query)
|
mux.HandleFunc("POST /query", h.Query)
|
||||||
mux.HandleFunc("POST /write", h.Write)
|
mux.HandleFunc("POST /write", h.Write)
|
||||||
|
mux.HandleFunc("POST /index", h.Index)
|
||||||
mux.HandleFunc("POST /ingest", h.Ingest)
|
mux.HandleFunc("POST /ingest", h.Ingest)
|
||||||
mux.HandleFunc("POST /ingest-path", h.IngestPath)
|
mux.HandleFunc("POST /ingest-path", h.IngestPath)
|
||||||
mux.HandleFunc("POST /ingest-raw", h.IngestRaw)
|
mux.HandleFunc("POST /ingest-raw", h.IngestRaw)
|
||||||
mux.HandleFunc("POST /backfill-refs", h.BackfillRefs)
|
mux.HandleFunc("POST /backfill-refs", h.BackfillRefs)
|
||||||
|
mux.HandleFunc("POST /backfill-embeddings", h.BackfillEmbeddings)
|
||||||
mux.HandleFunc("GET /pass-rate", h.PassRate)
|
mux.HandleFunc("GET /pass-rate", h.PassRate)
|
||||||
mux.Handle("/mcp", mcp.BearerAuth(mcpToken, mcpSrv))
|
var jwtValidator *auth.Validator
|
||||||
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
||||||
|
audience := os.Getenv("MCP_AUDIENCE")
|
||||||
|
v, err := auth.NewValidator(dexURL, audience)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("build jwt validator", "err", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
jwtValidator = v
|
||||||
|
logger.Info("jwt auth enabled", "issuer", dexURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resource-metadata URL is only emitted on 401 when Dex OAuth is
|
||||||
|
// configured. Static-Bearer-only deployments leave this empty so
|
||||||
|
// clients never see an OAuth challenge.
|
||||||
|
var resourceMetadataURL string
|
||||||
|
if dexURL := os.Getenv("DEX_ISSUER_URL"); dexURL != "" {
|
||||||
|
resourceURL := os.Getenv("MCP_RESOURCE_URL")
|
||||||
|
mux.HandleFunc("GET /.well-known/oauth-protected-resource",
|
||||||
|
auth.ProtectedResourceHandler(resourceURL, dexURL))
|
||||||
|
if resourceURL != "" {
|
||||||
|
resourceMetadataURL = strings.TrimRight(resourceURL, "/") + "/.well-known/oauth-protected-resource"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mux.Handle("/mcp", mcp.BearerAuth(mcpToken, jwtValidator, resourceMetadataURL, mcpSrv))
|
||||||
|
|
||||||
|
// Opt-in OAuth 2.0 client_credentials flow for claude.ai's custom-MCP
|
||||||
|
// integration UI, which has no static-Bearer field. Setting both
|
||||||
|
// OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET enables the token exchange;
|
||||||
|
// setting only one is misconfiguration → fail fast.
|
||||||
|
oauthID := os.Getenv("OAUTH_CLIENT_ID")
|
||||||
|
oauthSecret := os.Getenv("OAUTH_CLIENT_SECRET")
|
||||||
|
switch {
|
||||||
|
case oauthID != "" && oauthSecret != "":
|
||||||
|
issuer := os.Getenv("MCP_RESOURCE_URL")
|
||||||
|
if issuer == "" {
|
||||||
|
logger.Error("OAUTH_CLIENT_ID/SECRET set but MCP_RESOURCE_URL is empty; cannot derive issuer")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
mux.HandleFunc("GET /.well-known/oauth-authorization-server",
|
||||||
|
oauth.MetadataHandler(issuer))
|
||||||
|
mux.HandleFunc("POST /oauth/token", oauth.TokenHandler(oauth.TokenConfig{
|
||||||
|
ClientID: oauthID,
|
||||||
|
ClientSecret: oauthSecret,
|
||||||
|
AccessToken: mcpToken,
|
||||||
|
}))
|
||||||
|
logger.Info("oauth client_credentials enabled", "issuer", strings.TrimRight(issuer, "/"))
|
||||||
|
case oauthID == "" && oauthSecret == "":
|
||||||
|
// disabled — that's fine
|
||||||
|
default:
|
||||||
|
logger.Error("OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET must be set together")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
addr := ":" + port
|
addr := ":" + port
|
||||||
watchIntervalLog := "disabled"
|
watchIntervalLog := "disabled"
|
||||||
|
|||||||
@@ -2,10 +2,29 @@ module github.com/mathiasbq/hyperguild/ingestion
|
|||||||
|
|
||||||
go 1.26.1
|
go 1.26.1
|
||||||
|
|
||||||
require github.com/stretchr/testify v1.11.1
|
require (
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6
|
||||||
|
github.com/stretchr/testify v1.11.1
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
|
||||||
|
github.com/goccy/go-json v0.10.3 // indirect
|
||||||
|
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||||
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||||
|
github.com/jackc/pgx/v5 v5.9.2 // indirect
|
||||||
|
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3 // indirect
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1 // indirect
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6 // indirect
|
||||||
|
github.com/lestrrat-go/iter v1.0.2 // indirect
|
||||||
|
github.com/lestrrat-go/option v1.0.1 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
|
github.com/segmentio/asm v1.2.0 // indirect
|
||||||
|
golang.org/x/crypto v0.32.0 // indirect
|
||||||
|
golang.org/x/sync v0.17.0 // indirect
|
||||||
|
golang.org/x/sys v0.31.0 // indirect
|
||||||
|
golang.org/x/text v0.29.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,9 +1,52 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 h1:NMZiJj8QnKe1LgsbDayM4UoHwbvwDRwnI3hwNaAHRnc=
|
||||||
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0/go.mod h1:ZXNYxsqcloTdSy/rNShjYzMhyjf0LaoftYK0p+A3h40=
|
||||||
|
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||||
|
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||||
|
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||||
|
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||||
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
||||||
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||||
|
github.com/jackc/pgx/v5 v5.9.2 h1:3ZhOzMWnR4yJ+RW1XImIPsD1aNSz4T4fyP7zlQb56hw=
|
||||||
|
github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
|
||||||
|
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||||
|
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3 h1:94HXkVLxkZO9vJI/w2u1T0DAoprShFd13xtnSINtDWs=
|
||||||
|
github.com/lestrrat-go/blackmagic v1.0.3/go.mod h1:6AWFyKNNj0zEXQYfTMPfZrAXUWUfTIZ5ECEUEJaijtw=
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE=
|
||||||
|
github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E=
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6 h1:qgmgIRhpvBqexMJjA/PmwSvhNk679oqD1RbovdCGW8k=
|
||||||
|
github.com/lestrrat-go/httprc v1.0.6/go.mod h1:mwwz3JMTPBjHUkkDv/IGJ39aALInZLrhBp0X7KGUZlo=
|
||||||
|
github.com/lestrrat-go/iter v1.0.2 h1:gMXo1q4c2pHmC3dn8LzRhJfP1ceCbgSiT9lUydIzltI=
|
||||||
|
github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6 h1:hxM1gfDILk/l5ylers6BX/Eq1m/pnxe9NBwW6lVfecA=
|
||||||
|
github.com/lestrrat-go/jwx/v2 v2.1.6/go.mod h1:Y722kU5r/8mV7fYDifjug0r8FK8mZdw0K0GpJw/l8pU=
|
||||||
|
github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU=
|
||||||
|
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
|
||||||
|
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
|
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
|
||||||
|
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
|
||||||
|
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
||||||
|
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||||
|
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
||||||
|
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
|
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
|
||||||
|
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|||||||
@@ -11,9 +11,11 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Handler serves the ingestion HTTP API.
|
// Handler serves the ingestion HTTP API.
|
||||||
@@ -21,6 +23,8 @@ type Handler struct {
|
|||||||
brainDir string
|
brainDir string
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
pipeline pipeline.Config
|
pipeline pipeline.Config
|
||||||
|
embedStore vectorstore.Store
|
||||||
|
embedClient vectorstore.Embedder
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHandler constructs a Handler. brainDir is the absolute path to brain/.
|
// NewHandler constructs a Handler. brainDir is the absolute path to brain/.
|
||||||
@@ -31,9 +35,19 @@ func NewHandler(brainDir string, logger *slog.Logger, pipelineCfg pipeline.Confi
|
|||||||
return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg}
|
return &Handler{brainDir: brainDir, logger: logger, pipeline: pipelineCfg}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithEmbedSync wires the optional vector store + embedder used by the
|
||||||
|
// POST /backfill-embeddings endpoint. Calling with either nil is a no-op.
|
||||||
|
func (h *Handler) WithEmbedSync(store vectorstore.Store, embedder vectorstore.Embedder) *Handler {
|
||||||
|
h.embedStore = store
|
||||||
|
h.embedClient = embedder
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
type queryRequest struct {
|
type queryRequest struct {
|
||||||
Query string `json:"query"`
|
Query string `json:"query"`
|
||||||
Limit int `json:"limit,omitempty"`
|
Limit int `json:"limit,omitempty"`
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
Hall string `json:"hall,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type writeRequest struct {
|
type writeRequest struct {
|
||||||
@@ -41,6 +55,8 @@ type writeRequest struct {
|
|||||||
Filename string `json:"filename,omitempty"`
|
Filename string `json:"filename,omitempty"`
|
||||||
Type string `json:"type,omitempty"`
|
Type string `json:"type,omitempty"`
|
||||||
Domain string `json:"domain,omitempty"`
|
Domain string `json:"domain,omitempty"`
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
Hall string `json:"hall,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ingestRequest struct {
|
type ingestRequest struct {
|
||||||
@@ -75,7 +91,12 @@ func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
|
|||||||
req.Limit = 5
|
req.Limit = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
results, err := search.Query(h.brainDir, req.Query, req.Limit)
|
results, err := search.Query(h.brainDir, search.QueryOptions{
|
||||||
|
Query: req.Query,
|
||||||
|
Limit: req.Limit,
|
||||||
|
Wing: req.Wing,
|
||||||
|
Hall: req.Hall,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.logger.Error("query failed", "err", err)
|
h.logger.Error("query failed", "err", err)
|
||||||
writeError(w, http.StatusInternalServerError, "search error")
|
writeError(w, http.StatusInternalServerError, "search error")
|
||||||
@@ -85,13 +106,78 @@ func (h *Handler) Query(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJSON(w, map[string]any{"results": results})
|
writeJSON(w, map[string]any{"results": results})
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteNote writes a markdown file to brainDir/knowledge/<filename>, optionally
|
// WriteNoteOptions configures how a brain note is written.
|
||||||
// prefixed with YAML frontmatter built from typ and domain. Returns the path
|
//
|
||||||
|
// When both Wing and Hall are non-empty, the note routes into the
|
||||||
|
// structured wiki at brain/wiki/<wing>/<hall>/<slug>.md and gets
|
||||||
|
// wing/hall/created_at injected into its YAML frontmatter.
|
||||||
|
//
|
||||||
|
// When either is empty, the note falls back to brain/knowledge/<filename>
|
||||||
|
// with optional type/domain frontmatter (legacy behaviour).
|
||||||
|
type WriteNoteOptions struct {
|
||||||
|
Content string
|
||||||
|
Filename string
|
||||||
|
Type string
|
||||||
|
Domain string
|
||||||
|
Wing string
|
||||||
|
Hall string
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteNote writes a markdown note into the brain. Returns the path
|
||||||
// relative to brainDir (forward-slashed). Filename traversal is rejected.
|
// relative to brainDir (forward-slashed). Filename traversal is rejected.
|
||||||
func WriteNote(brainDir, content, filename, typ, domain string) (string, error) {
|
func WriteNote(brainDir string, opts WriteNoteOptions) (string, error) {
|
||||||
if content == "" {
|
if opts.Content == "" {
|
||||||
return "", fmt.Errorf("content is required")
|
return "", fmt.Errorf("content is required")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.Wing != "" && opts.Hall != "" {
|
||||||
|
return writeHallNote(brainDir, opts)
|
||||||
|
}
|
||||||
|
if opts.Wing != "" || opts.Hall != "" {
|
||||||
|
return "", fmt.Errorf("wing and hall must be set together")
|
||||||
|
}
|
||||||
|
return writeLegacyNote(brainDir, opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeHallNote routes a note into brain/wiki/<wing>/<hall>/ and injects
|
||||||
|
// wing/hall/created_at frontmatter.
|
||||||
|
func writeHallNote(brainDir string, opts WriteNoteOptions) (string, error) {
|
||||||
|
slug := opts.Filename
|
||||||
|
if slug == "" {
|
||||||
|
slug = time.Now().UTC().Format("2006-01-02-150405") + "-auto"
|
||||||
|
}
|
||||||
|
dest, err := brain.NotePath(brainDir, opts.Wing, opts.Hall, slug)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
|
||||||
|
return "", fmt.Errorf("create hall dir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var fm strings.Builder
|
||||||
|
fm.WriteString("---\n")
|
||||||
|
fmt.Fprintf(&fm, "wing: %s\n", brain.Sanitise(opts.Wing))
|
||||||
|
fmt.Fprintf(&fm, "hall: %s\n", opts.Hall)
|
||||||
|
fmt.Fprintf(&fm, "created_at: %s\n", time.Now().UTC().Format(time.RFC3339))
|
||||||
|
if opts.Type != "" {
|
||||||
|
fmt.Fprintf(&fm, "type: %s\n", opts.Type)
|
||||||
|
}
|
||||||
|
if opts.Domain != "" {
|
||||||
|
fmt.Fprintf(&fm, "domain: %s\n", opts.Domain)
|
||||||
|
}
|
||||||
|
fm.WriteString("---\n")
|
||||||
|
|
||||||
|
if err := os.WriteFile(dest, []byte(fm.String()+opts.Content), 0o644); err != nil {
|
||||||
|
return "", fmt.Errorf("write: %w", err)
|
||||||
|
}
|
||||||
|
rel, _ := filepath.Rel(brainDir, dest)
|
||||||
|
return filepath.ToSlash(rel), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeLegacyNote preserves the original brain/knowledge/ behaviour for
|
||||||
|
// callers that have not adopted the wing/hall taxonomy.
|
||||||
|
func writeLegacyNote(brainDir string, opts WriteNoteOptions) (string, error) {
|
||||||
|
filename := opts.Filename
|
||||||
if filename == "" {
|
if filename == "" {
|
||||||
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
|
filename = fmt.Sprintf("%s-auto.md", time.Now().UTC().Format("2006-01-02-150405"))
|
||||||
}
|
}
|
||||||
@@ -101,26 +187,24 @@ func WriteNote(brainDir, content, filename, typ, domain string) (string, error)
|
|||||||
return "", fmt.Errorf("create raw dir: %w", err)
|
return "", fmt.Errorf("create raw dir: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
finalContent := content
|
finalContent := opts.Content
|
||||||
if typ != "" || domain != "" {
|
if opts.Type != "" || opts.Domain != "" {
|
||||||
var fm strings.Builder
|
var fm strings.Builder
|
||||||
fm.WriteString("---\n")
|
fm.WriteString("---\n")
|
||||||
if typ != "" {
|
if opts.Type != "" {
|
||||||
fmt.Fprintf(&fm, "type: %s\n", typ)
|
fmt.Fprintf(&fm, "type: %s\n", opts.Type)
|
||||||
}
|
}
|
||||||
if domain != "" {
|
if opts.Domain != "" {
|
||||||
fmt.Fprintf(&fm, "domain: %s\n", domain)
|
fmt.Fprintf(&fm, "domain: %s\n", opts.Domain)
|
||||||
}
|
}
|
||||||
fm.WriteString("---\n")
|
fm.WriteString("---\n")
|
||||||
finalContent = fm.String() + content
|
finalContent = fm.String() + opts.Content
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reject path separators outright; any non-flat filename is misuse.
|
|
||||||
if strings.ContainsAny(filename, `/\`) {
|
if strings.ContainsAny(filename, `/\`) {
|
||||||
return "", fmt.Errorf("invalid filename")
|
return "", fmt.Errorf("invalid filename")
|
||||||
}
|
}
|
||||||
base := filepath.Base(filename)
|
base := filepath.Base(filename)
|
||||||
// After Base, "." and ".." remain. Reject those before adding .md.
|
|
||||||
if base == "." || base == ".." || base == "" {
|
if base == "." || base == ".." || base == "" {
|
||||||
return "", fmt.Errorf("invalid filename")
|
return "", fmt.Errorf("invalid filename")
|
||||||
}
|
}
|
||||||
@@ -143,15 +227,77 @@ func (h *Handler) Write(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeError(w, http.StatusBadRequest, "invalid JSON")
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
relPath, err := WriteNote(h.brainDir, req.Content, req.Filename, req.Type, req.Domain)
|
relPath, err := WriteNote(h.brainDir, WriteNoteOptions(req))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.logger.Error("write failed", "err", err)
|
h.logger.Error("write failed", "err", err)
|
||||||
writeError(w, http.StatusBadRequest, err.Error())
|
writeError(w, http.StatusBadRequest, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if req.Wing != "" && req.Hall != "" {
|
||||||
|
if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil {
|
||||||
|
h.logger.Warn("auto-index failed", "wing", req.Wing, "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
writeJSON(w, map[string]string{"path": relPath})
|
writeJSON(w, map[string]string{"path": relPath})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BackfillEmbeddings handles POST /backfill-embeddings — synchronously
|
||||||
|
// embeds every note under brain/wiki/ that's not yet in the vector
|
||||||
|
// store, and deletes rows for files no longer on disk.
|
||||||
|
func (h *Handler) BackfillEmbeddings(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.embedStore == nil || h.embedClient == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable,
|
||||||
|
"embeddings not configured (set BRAIN_PG_DSN and BRAIN_EMBED_URL)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
res, err := vectorstore.Sync(r.Context(), h.brainDir, h.embedStore, h.embedClient)
|
||||||
|
if err != nil {
|
||||||
|
h.logger.Error("backfill failed", "err", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, "backfill error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
errStrs := make([]string, 0, len(res.Errors))
|
||||||
|
for _, e := range res.Errors {
|
||||||
|
errStrs = append(errStrs, e.Error())
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]any{
|
||||||
|
"added": res.Added,
|
||||||
|
"deleted": res.Deleted,
|
||||||
|
"errors": errStrs,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type indexRequest struct {
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index handles POST /index — regenerate the _index.md MOC for one wing
|
||||||
|
// (when "wing" is set) or for every wing (when omitted).
|
||||||
|
func (h *Handler) Index(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req indexRequest
|
||||||
|
if r.ContentLength > 0 {
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if req.Wing == "" {
|
||||||
|
if err := brain.BuildAllWingIndexes(h.brainDir); err != nil {
|
||||||
|
h.logger.Error("index all failed", "err", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, "index error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]any{"status": "ok", "scope": "all"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := brain.BuildWingIndex(h.brainDir, req.Wing); err != nil {
|
||||||
|
h.logger.Error("index failed", "wing", req.Wing, "err", err)
|
||||||
|
writeError(w, http.StatusBadRequest, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]any{"status": "ok", "scope": req.Wing})
|
||||||
|
}
|
||||||
|
|
||||||
// Ingest handles POST /ingest — run the pipeline on provided content.
|
// Ingest handles POST /ingest — run the pipeline on provided content.
|
||||||
func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) {
|
func (h *Handler) Ingest(w http.ResponseWriter, r *http.Request) {
|
||||||
var req ingestRequest
|
var req ingestRequest
|
||||||
|
|||||||
84
ingestion/internal/auth/jwt.go
Normal file
84
ingestion/internal/auth/jwt.go
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwk"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validator validates Bearer JWTs issued by a Dex (OIDC) authorization server.
|
||||||
|
// Audience is optional; leave empty to skip audience validation.
|
||||||
|
type Validator struct {
|
||||||
|
issuer string
|
||||||
|
audience string
|
||||||
|
jwksURI string
|
||||||
|
cache *jwk.Cache
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewValidator fetches the OIDC discovery document from issuerURL, extracts
|
||||||
|
// jwks_uri, seeds the JWKS cache, and returns a ready Validator.
|
||||||
|
// If DEX_ISSUER_URL is not set the caller should pass "" and skip construction.
|
||||||
|
func NewValidator(issuerURL, audience string) (*Validator, error) {
|
||||||
|
resp, err := http.Get(issuerURL + "/.well-known/openid-configuration") //nolint:noctx
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("fetch oidc discovery: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("oidc discovery: status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var doc struct {
|
||||||
|
JWKSURI string `json:"jwks_uri"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode oidc discovery: %w", err)
|
||||||
|
}
|
||||||
|
if doc.JWKSURI == "" {
|
||||||
|
return nil, fmt.Errorf("oidc discovery: empty jwks_uri")
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
cache := jwk.NewCache(ctx)
|
||||||
|
if err := cache.Register(doc.JWKSURI, jwk.WithMinRefreshInterval(time.Hour)); err != nil {
|
||||||
|
return nil, fmt.Errorf("register jwks cache: %w", err)
|
||||||
|
}
|
||||||
|
if _, err := cache.Refresh(ctx, doc.JWKSURI); err != nil {
|
||||||
|
return nil, fmt.Errorf("initial jwks fetch: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Validator{
|
||||||
|
issuer: issuerURL,
|
||||||
|
audience: audience,
|
||||||
|
jwksURI: doc.JWKSURI,
|
||||||
|
cache: cache,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate parses and validates rawToken. Returns the subject claim on success.
|
||||||
|
func (v *Validator) Validate(ctx context.Context, rawToken string) (string, error) {
|
||||||
|
keySet, err := v.cache.Get(ctx, v.jwksURI)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("get jwks: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := []jwt.ParseOption{
|
||||||
|
jwt.WithKeySet(keySet),
|
||||||
|
jwt.WithValidate(true),
|
||||||
|
jwt.WithIssuer(v.issuer),
|
||||||
|
}
|
||||||
|
if v.audience != "" {
|
||||||
|
opts = append(opts, jwt.WithAudience(v.audience))
|
||||||
|
}
|
||||||
|
|
||||||
|
tok, err := jwt.ParseString(rawToken, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("validate jwt: %w", err)
|
||||||
|
}
|
||||||
|
return tok.Subject(), nil
|
||||||
|
}
|
||||||
169
ingestion/internal/auth/jwt_test.go
Normal file
169
ingestion/internal/auth/jwt_test.go
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
package auth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/rsa"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwa"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwk"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwt"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testKeys struct {
|
||||||
|
priv jwk.Key
|
||||||
|
pub jwk.Key
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRSAKeys(t *testing.T) testKeys {
|
||||||
|
t.Helper()
|
||||||
|
raw, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
priv, err := jwk.FromRaw(raw)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, priv.Set(jwk.KeyIDKey, "test-kid"))
|
||||||
|
require.NoError(t, priv.Set(jwk.AlgorithmKey, jwa.RS256))
|
||||||
|
|
||||||
|
pub, err := jwk.PublicKeyOf(priv)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return testKeys{priv: priv, pub: pub}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mockOIDCServer(t *testing.T, keys testKeys) *httptest.Server {
|
||||||
|
t.Helper()
|
||||||
|
set := jwk.NewSet()
|
||||||
|
require.NoError(t, set.AddKey(keys.pub))
|
||||||
|
jwksBytes, err := json.Marshal(set)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
var srv *httptest.Server
|
||||||
|
mux.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]string{
|
||||||
|
"issuer": srv.URL,
|
||||||
|
"jwks_uri": srv.URL + "/jwks",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/jwks", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write(jwksBytes)
|
||||||
|
})
|
||||||
|
srv = httptest.NewServer(mux)
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
return srv
|
||||||
|
}
|
||||||
|
|
||||||
|
func signToken(t *testing.T, keys testKeys, issuer, audience, subject string, exp time.Time) string {
|
||||||
|
t.Helper()
|
||||||
|
b := jwt.NewBuilder().
|
||||||
|
Issuer(issuer).
|
||||||
|
Subject(subject).
|
||||||
|
Expiration(exp)
|
||||||
|
if audience != "" {
|
||||||
|
b = b.Audience([]string{audience})
|
||||||
|
}
|
||||||
|
tok, err := b.Build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
signed, err := jwt.Sign(tok, jwt.WithKey(jwa.RS256, keys.priv))
|
||||||
|
require.NoError(t, err)
|
||||||
|
return string(signed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidator(t *testing.T) {
|
||||||
|
keys := generateRSAKeys(t)
|
||||||
|
srv := mockOIDCServer(t, keys)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
v, err := auth.NewValidator(srv.URL, "brain")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
token string
|
||||||
|
wantSub string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid jwt",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantSub: "test-user",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "expired jwt",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(-time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrong issuer",
|
||||||
|
token: signToken(t, keys, "https://evil.example.com", "brain", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrong audience",
|
||||||
|
token: signToken(t, keys, srv.URL, "other-service", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tampered token",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(time.Hour)) + "tampered",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "not a jwt",
|
||||||
|
token: "not-a-jwt",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
sub, err := v.Validate(ctx, tc.token)
|
||||||
|
if tc.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Empty(t, sub)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, tc.wantSub, sub)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewValidator_NoAudience(t *testing.T) {
|
||||||
|
keys := generateRSAKeys(t)
|
||||||
|
srv := mockOIDCServer(t, keys)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
v, err := auth.NewValidator(srv.URL, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Token without audience passes when audience validation is disabled.
|
||||||
|
tok, err := jwt.NewBuilder().
|
||||||
|
Issuer(srv.URL).
|
||||||
|
Subject("sub").
|
||||||
|
Expiration(time.Now().Add(time.Hour)).
|
||||||
|
Build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
signed, err := jwt.Sign(tok, jwt.WithKey(jwa.RS256, keys.priv))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sub, err := v.Validate(ctx, string(signed))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "sub", sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewValidator_BadDiscoveryURL(t *testing.T) {
|
||||||
|
_, err := auth.NewValidator("http://127.0.0.1:1", "brain")
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
23
ingestion/internal/auth/protected_resource.go
Normal file
23
ingestion/internal/auth/protected_resource.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProtectedResourceHandler returns an RFC 9728 oauth-protected-resource metadata
|
||||||
|
// handler. Mount at GET /.well-known/oauth-protected-resource (no auth required).
|
||||||
|
func ProtectedResourceHandler(resourceURL, issuerURL string) http.HandlerFunc {
|
||||||
|
type metadata struct {
|
||||||
|
Resource string `json:"resource"`
|
||||||
|
AuthorizationServers []string `json:"authorization_servers"`
|
||||||
|
}
|
||||||
|
body, _ := json.Marshal(metadata{
|
||||||
|
Resource: resourceURL,
|
||||||
|
AuthorizationServers: []string{issuerURL},
|
||||||
|
})
|
||||||
|
return func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
28
ingestion/internal/auth/protected_resource_test.go
Normal file
28
ingestion/internal/auth/protected_resource_test.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
package auth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestProtectedResourceHandler(t *testing.T) {
|
||||||
|
h := auth.ProtectedResourceHandler("https://brain-mcp.d-ma.be", "https://auth.d-ma.be")
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/.well-known/oauth-protected-resource", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
h(rr, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
assert.Equal(t, "application/json", rr.Header().Get("Content-Type"))
|
||||||
|
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &body))
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be", body["resource"])
|
||||||
|
servers := body["authorization_servers"].([]any)
|
||||||
|
assert.Equal(t, "https://auth.d-ma.be", servers[0])
|
||||||
|
}
|
||||||
161
ingestion/internal/brain/index.go
Normal file
161
ingestion/internal/brain/index.go
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
package brain
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// noteEntry is one row in a Wing _index.md.
|
||||||
|
type noteEntry struct {
|
||||||
|
Hall string
|
||||||
|
Slug string
|
||||||
|
Title string
|
||||||
|
Created string
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildWingIndex regenerates brain/wiki/<wing>/_index.md as a Map of
|
||||||
|
// Content listing every note in that wing with its Hall and creation
|
||||||
|
// date. Returns nil if the wing directory does not exist.
|
||||||
|
func BuildWingIndex(brainDir, wing string) error {
|
||||||
|
w := Sanitise(wing)
|
||||||
|
if w == "" {
|
||||||
|
return fmt.Errorf("invalid wing %q", wing)
|
||||||
|
}
|
||||||
|
wingDir := filepath.Join(brainDir, "wiki", w)
|
||||||
|
if _, err := os.Stat(wingDir); os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
} else if err != nil {
|
||||||
|
return fmt.Errorf("stat wing: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
entries, err := collectWingEntries(wingDir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
sort.Slice(entries, func(i, j int) bool {
|
||||||
|
if entries[i].Hall != entries[j].Hall {
|
||||||
|
return entries[i].Hall < entries[j].Hall
|
||||||
|
}
|
||||||
|
return entries[i].Slug < entries[j].Slug
|
||||||
|
})
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "# %s\n\n", w)
|
||||||
|
b.WriteString("| Hall | Note | Created |\n")
|
||||||
|
b.WriteString("|------|------|---------|\n")
|
||||||
|
for _, e := range entries {
|
||||||
|
fmt.Fprintf(&b, "| %s | [%s](%s/%s.md) | %s |\n", e.Hall, e.Title, e.Hall, e.Slug, e.Created)
|
||||||
|
}
|
||||||
|
|
||||||
|
dest := filepath.Join(wingDir, "_index.md")
|
||||||
|
return os.WriteFile(dest, []byte(b.String()), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildAllWingIndexes regenerates _index.md for every wing under brain/wiki/.
|
||||||
|
func BuildAllWingIndexes(brainDir string) error {
|
||||||
|
wikiDir := filepath.Join(brainDir, "wiki")
|
||||||
|
ents, err := os.ReadDir(wikiDir)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read wiki: %w", err)
|
||||||
|
}
|
||||||
|
for _, e := range ents {
|
||||||
|
if !e.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := BuildWingIndex(brainDir, e.Name()); err != nil {
|
||||||
|
return fmt.Errorf("index %s: %w", e.Name(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectWingEntries(wingDir string) ([]noteEntry, error) {
|
||||||
|
var out []noteEntry
|
||||||
|
ents, err := os.ReadDir(wingDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read wing: %w", err)
|
||||||
|
}
|
||||||
|
for _, hallEnt := range ents {
|
||||||
|
if !hallEnt.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hall := hallEnt.Name()
|
||||||
|
if !IsValidHall(hall) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hallDir := filepath.Join(wingDir, hall)
|
||||||
|
notes, err := os.ReadDir(hallDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read hall %s: %w", hall, err)
|
||||||
|
}
|
||||||
|
for _, n := range notes {
|
||||||
|
if n.IsDir() || !strings.HasSuffix(n.Name(), ".md") || n.Name() == "_index.md" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
slug := strings.TrimSuffix(n.Name(), ".md")
|
||||||
|
full := filepath.Join(hallDir, n.Name())
|
||||||
|
title, created := readTitleAndCreated(full, slug)
|
||||||
|
out = append(out, noteEntry{Hall: hall, Slug: slug, Title: title, Created: created})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readTitleAndCreated reads YAML frontmatter for title + created_at; falls
|
||||||
|
// back to slug and file mtime when absent.
|
||||||
|
func readTitleAndCreated(path, slug string) (string, string) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return slug, ""
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
|
||||||
|
title, created := "", ""
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
inFrontmatter := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFrontmatter {
|
||||||
|
inFrontmatter = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !inFrontmatter {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v := strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "title":
|
||||||
|
title = v
|
||||||
|
case "created_at":
|
||||||
|
if t, err := time.Parse(time.RFC3339, v); err == nil {
|
||||||
|
created = t.UTC().Format("2006-01-02")
|
||||||
|
} else {
|
||||||
|
created = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if title == "" {
|
||||||
|
title = strings.ReplaceAll(slug, "-", " ")
|
||||||
|
}
|
||||||
|
if created == "" {
|
||||||
|
if info, err := os.Stat(path); err == nil {
|
||||||
|
created = info.ModTime().UTC().Format("2006-01-02")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return title, created
|
||||||
|
}
|
||||||
85
ingestion/internal/brain/index_test.go
Normal file
85
ingestion/internal/brain/index_test.go
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
package brain_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildWingIndex(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
{"wiki/jepa-fx/decisions/val-vol.md", "---\ntitle: Val Vol R2\ncreated_at: 2026-05-06T10:00:00Z\n---\nbody\n"},
|
||||||
|
{"wiki/jepa-fx/facts/architecture.md", "---\ntitle: Architecture\ncreated_at: 2026-05-04T10:00:00Z\n---\nbody\n"},
|
||||||
|
{"wiki/jepa-fx/sources/paper.md", "---\n---\nbody\n"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(dir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, brain.BuildWingIndex(dir, "jepa-fx"))
|
||||||
|
|
||||||
|
got, err := os.ReadFile(filepath.Join(dir, "wiki", "jepa-fx", "_index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
s := string(got)
|
||||||
|
assert.Contains(t, s, "# jepa-fx")
|
||||||
|
assert.Contains(t, s, "| Hall | Note | Created |")
|
||||||
|
assert.Contains(t, s, "| decisions | [Val Vol R2](decisions/val-vol.md) | 2026-05-06 |")
|
||||||
|
assert.Contains(t, s, "| facts | [Architecture](facts/architecture.md) | 2026-05-04 |")
|
||||||
|
assert.Contains(t, s, "| sources | [paper](sources/paper.md) |")
|
||||||
|
// Halls sorted alphabetically.
|
||||||
|
assert.Less(t, indexOf(s, "decisions"), indexOf(s, "facts"))
|
||||||
|
assert.Less(t, indexOf(s, "facts"), indexOf(s, "sources"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildWingIndex_SkipsInvalidHalls(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
wingDir := filepath.Join(dir, "wiki", "jepa-fx")
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(wingDir, "garbage"), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(wingDir, "garbage", "x.md"), []byte("x"), 0o644))
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(wingDir, "facts"), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(wingDir, "facts", "y.md"), []byte("y"), 0o644))
|
||||||
|
|
||||||
|
require.NoError(t, brain.BuildWingIndex(dir, "jepa-fx"))
|
||||||
|
got, err := os.ReadFile(filepath.Join(wingDir, "_index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
s := string(got)
|
||||||
|
assert.Contains(t, s, "facts")
|
||||||
|
assert.NotContains(t, s, "garbage")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildAllWingIndexes(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
{"wiki/a/facts/x.md", "x"},
|
||||||
|
{"wiki/b/facts/y.md", "y"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(dir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
require.NoError(t, brain.BuildAllWingIndexes(dir))
|
||||||
|
_, err := os.Stat(filepath.Join(dir, "wiki", "a", "_index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = os.Stat(filepath.Join(dir, "wiki", "b", "_index.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildWingIndex_NoWingDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, brain.BuildWingIndex(dir, "ghost"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func indexOf(s, sub string) int {
|
||||||
|
for i := 0; i+len(sub) <= len(s); i++ {
|
||||||
|
if s[i:i+len(sub)] == sub {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
70
ingestion/internal/brain/path.go
Normal file
70
ingestion/internal/brain/path.go
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
// Package brain provides the wing/hall path taxonomy used by the brain
|
||||||
|
// wiki layout. A note's canonical location is
|
||||||
|
// brain/wiki/<wing>/<hall>/<slug>.md, where Wing is a free-form topic
|
||||||
|
// domain and Hall is one of a closed vocabulary of memory types.
|
||||||
|
package brain
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ValidHalls is the closed vocabulary of hall names. A hall captures the
|
||||||
|
// memory type of a note within any wing.
|
||||||
|
var ValidHalls = map[string]bool{
|
||||||
|
"facts": true,
|
||||||
|
"decisions": true,
|
||||||
|
"failures": true,
|
||||||
|
"hypotheses": true,
|
||||||
|
"sources": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsValidHall reports whether h is in the closed Hall vocabulary.
|
||||||
|
func IsValidHall(h string) bool {
|
||||||
|
return ValidHalls[h]
|
||||||
|
}
|
||||||
|
|
||||||
|
// NotePath resolves the canonical filesystem path for a note given a
|
||||||
|
// wing, hall, and slug. Returns an error if hall is not in ValidHalls
|
||||||
|
// or if wing/slug sanitise to empty strings.
|
||||||
|
//
|
||||||
|
// The returned path is brain/wiki/<wing>/<hall>/<slug>.md with all
|
||||||
|
// segments sanitised: lowercased, alphanumerics and hyphens only.
|
||||||
|
func NotePath(brainDir, wing, hall, slug string) (string, error) {
|
||||||
|
if !IsValidHall(hall) {
|
||||||
|
return "", fmt.Errorf("invalid hall %q: must be one of facts/decisions/failures/hypotheses/sources", hall)
|
||||||
|
}
|
||||||
|
w := Sanitise(wing)
|
||||||
|
if w == "" {
|
||||||
|
return "", fmt.Errorf("invalid wing %q: must contain at least one alphanumeric character", wing)
|
||||||
|
}
|
||||||
|
s := Sanitise(strings.TrimSuffix(slug, ".md"))
|
||||||
|
if s == "" {
|
||||||
|
return "", fmt.Errorf("invalid slug %q: must contain at least one alphanumeric character", slug)
|
||||||
|
}
|
||||||
|
return filepath.Join(brainDir, "wiki", w, hall, s+".md"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sanitise lowercases s and keeps only [a-z0-9-], collapsing any other
|
||||||
|
// character (including path separators) to a hyphen. Leading/trailing
|
||||||
|
// hyphens and runs of hyphens are collapsed.
|
||||||
|
func Sanitise(s string) string {
|
||||||
|
s = strings.ToLower(strings.TrimSpace(s))
|
||||||
|
var b strings.Builder
|
||||||
|
prevHyphen := true
|
||||||
|
for _, r := range s {
|
||||||
|
switch {
|
||||||
|
case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
|
||||||
|
b.WriteRune(r)
|
||||||
|
prevHyphen = false
|
||||||
|
case r == '-' || r == '_' || r == ' ' || r == '/' || r == '\\' || r == '.':
|
||||||
|
if !prevHyphen {
|
||||||
|
b.WriteByte('-')
|
||||||
|
prevHyphen = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out := b.String()
|
||||||
|
return strings.Trim(out, "-")
|
||||||
|
}
|
||||||
73
ingestion/internal/brain/path_test.go
Normal file
73
ingestion/internal/brain/path_test.go
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
package brain_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNotePath_Valid(t *testing.T) {
|
||||||
|
got, err := brain.NotePath("/b", "jepa-fx", "decisions", "val-vol-r2")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, filepath.Join("/b", "wiki", "jepa-fx", "decisions", "val-vol-r2.md"), got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotePath_StripsMdSuffix(t *testing.T) {
|
||||||
|
got, err := brain.NotePath("/b", "x", "facts", "note.md")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, filepath.Join("/b", "wiki", "x", "facts", "note.md"), got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotePath_SanitisesWingAndSlug(t *testing.T) {
|
||||||
|
got, err := brain.NotePath("/b", "Jepa FX!", "facts", "Val Vol R2")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, filepath.Join("/b", "wiki", "jepa-fx", "facts", "val-vol-r2.md"), got)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotePath_RejectsInvalidHall(t *testing.T) {
|
||||||
|
_, err := brain.NotePath("/b", "x", "garbage", "y")
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "invalid hall")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotePath_RejectsEmptyWing(t *testing.T) {
|
||||||
|
_, err := brain.NotePath("/b", "!!!", "facts", "y")
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "invalid wing")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotePath_RejectsEmptySlug(t *testing.T) {
|
||||||
|
_, err := brain.NotePath("/b", "x", "facts", "!!!")
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "invalid slug")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSanitise(t *testing.T) {
|
||||||
|
cases := map[string]string{
|
||||||
|
"Jepa-FX": "jepa-fx",
|
||||||
|
" foo bar ": "foo-bar",
|
||||||
|
"Val/Vol\\R2.md": "val-vol-r2-md",
|
||||||
|
"!!!": "",
|
||||||
|
"___leading": "leading",
|
||||||
|
"trailing___": "trailing",
|
||||||
|
"multi---hyphen": "multi-hyphen",
|
||||||
|
"UPPER 123 mixed": "upper-123-mixed",
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
t.Run(in, func(t *testing.T) {
|
||||||
|
assert.Equal(t, want, brain.Sanitise(in))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsValidHall(t *testing.T) {
|
||||||
|
for _, h := range []string{"facts", "decisions", "failures", "hypotheses", "sources"} {
|
||||||
|
assert.True(t, brain.IsValidHall(h), h)
|
||||||
|
}
|
||||||
|
for _, h := range []string{"", "Facts", "facts ", "rooms", "concepts", "entities"} {
|
||||||
|
assert.False(t, brain.IsValidHall(h), h)
|
||||||
|
}
|
||||||
|
}
|
||||||
286
ingestion/internal/brain/tunnel.go
Normal file
286
ingestion/internal/brain/tunnel.go
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
package brain
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// seeAlsoHeader is the markdown heading used to group cross-wing links.
|
||||||
|
const seeAlsoHeader = "## See also"
|
||||||
|
|
||||||
|
// TunnelCandidate is a cross-wing match surfaced by DetectTunnels. It is
|
||||||
|
// not yet a written link — the caller decides whether confidence is high
|
||||||
|
// enough to commit it via WriteTunnel.
|
||||||
|
type TunnelCandidate struct {
|
||||||
|
// TargetPath is the candidate note's path relative to brainDir
|
||||||
|
// (forward-slashed), e.g. "wiki/hyperguild/decisions/routing.md".
|
||||||
|
TargetPath string
|
||||||
|
// MatchedTerm is the title that matched in the source content.
|
||||||
|
MatchedTerm string
|
||||||
|
// Exact is true when the match was a case-insensitive whole-token
|
||||||
|
// hit on the target's frontmatter title. Fuzzy matches (substring
|
||||||
|
// only) are flagged Exact=false and should not be auto-written.
|
||||||
|
Exact bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// DetectTunnels scans brain/wiki/ for notes whose title appears in
|
||||||
|
// content. Returns one TunnelCandidate per matching note. Exact is true
|
||||||
|
// when content contains the title as a whole-word case-insensitive
|
||||||
|
// token; false when only a substring matched (caller treats these as
|
||||||
|
// fuzzy and should not auto-write them).
|
||||||
|
//
|
||||||
|
// A note's title is read from YAML frontmatter `title:`; failing that,
|
||||||
|
// the filename slug (sans `.md`, hyphens → spaces) is used.
|
||||||
|
func DetectTunnels(brainDir, content string) ([]TunnelCandidate, error) {
|
||||||
|
wikiDir := filepath.Join(brainDir, "wiki")
|
||||||
|
if _, err := os.Stat(wikiDir); os.IsNotExist(err) {
|
||||||
|
return nil, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, fmt.Errorf("stat wiki: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
lowerContent := strings.ToLower(content)
|
||||||
|
|
||||||
|
var out []TunnelCandidate
|
||||||
|
err := filepath.WalkDir(wikiDir, func(path string, d os.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.IsDir() || !strings.HasSuffix(path, ".md") || d.Name() == "_index.md" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
title, _ := readTitleAndCreated(path, strings.TrimSuffix(d.Name(), ".md"))
|
||||||
|
needle := strings.ToLower(strings.TrimSpace(title))
|
||||||
|
if needle == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
idx := strings.Index(lowerContent, needle)
|
||||||
|
if idx == -1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rel, err := filepath.Rel(brainDir, path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
out = append(out, TunnelCandidate{
|
||||||
|
TargetPath: filepath.ToSlash(rel),
|
||||||
|
MatchedTerm: title,
|
||||||
|
Exact: isWholeWord(lowerContent, idx, len(needle)),
|
||||||
|
})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isWholeWord reports whether the substring at [idx, idx+n) in s is
|
||||||
|
// bounded by non-alphanumeric characters (or string edges).
|
||||||
|
func isWholeWord(s string, idx, n int) bool {
|
||||||
|
left := idx == 0 || !isWordByte(s[idx-1])
|
||||||
|
right := idx+n == len(s) || !isWordByte(s[idx+n])
|
||||||
|
return left && right
|
||||||
|
}
|
||||||
|
|
||||||
|
func isWordByte(b byte) bool {
|
||||||
|
return (b >= 'a' && b <= 'z') ||
|
||||||
|
(b >= 'A' && b <= 'Z') ||
|
||||||
|
(b >= '0' && b <= '9')
|
||||||
|
}
|
||||||
|
|
||||||
|
// AutoTunnel runs DetectTunnels against content and, for each
|
||||||
|
// candidate, either writes a bidirectional tunnel (when the match is
|
||||||
|
// exact and in a different wing) or stages it for human review in
|
||||||
|
// brain/raw/tunnel-candidates-<YYYY-MM-DD>.md.
|
||||||
|
//
|
||||||
|
// sourcePath is the note that originated the content — used to skip
|
||||||
|
// self-matches and same-wing tunnels. Errors writing individual
|
||||||
|
// tunnels are recorded into the candidates file but never abort the
|
||||||
|
// rest of the scan; the caller's primary write has already succeeded
|
||||||
|
// and auto-linking is best-effort.
|
||||||
|
func AutoTunnel(brainDir, sourcePath, content string) error {
|
||||||
|
srcWing, err := wingOf(sourcePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
candidates, err := DetectTunnels(brainDir, content)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var fuzzy []TunnelCandidate
|
||||||
|
for _, c := range candidates {
|
||||||
|
if c.TargetPath == sourcePath {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
tgtWing, err := wingOf(c.TargetPath)
|
||||||
|
if err != nil || tgtWing == srcWing {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !c.Exact {
|
||||||
|
fuzzy = append(fuzzy, c)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := WriteTunnel(brainDir, sourcePath, c.TargetPath); err != nil {
|
||||||
|
fuzzy = append(fuzzy, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return logFuzzyCandidates(brainDir, sourcePath, fuzzy)
|
||||||
|
}
|
||||||
|
|
||||||
|
// logFuzzyCandidates appends one row per candidate to
|
||||||
|
// brain/raw/tunnel-candidates-<YYYY-MM-DD>.md, creating the file with a
|
||||||
|
// header on first write of the day. No-op when the candidate list is empty.
|
||||||
|
func logFuzzyCandidates(brainDir, sourcePath string, cs []TunnelCandidate) error {
|
||||||
|
if len(cs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rawDir := filepath.Join(brainDir, "raw")
|
||||||
|
if err := os.MkdirAll(rawDir, 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stamp := time.Now().UTC().Format("2006-01-02")
|
||||||
|
path := filepath.Join(rawDir, "tunnel-candidates-"+stamp+".md")
|
||||||
|
existed := fileExists(path)
|
||||||
|
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
if !existed {
|
||||||
|
if _, err := f.WriteString("# Tunnel candidates " + stamp + "\n\nFuzzy cross-wing matches surfaced by AutoTunnel. Review and promote to a tunnel via `brain_tunnel` if relevant.\n\n"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, c := range cs {
|
||||||
|
line := fmt.Sprintf("- `%s` ↔ `%s` (term: %q)\n", sourcePath, c.TargetPath, c.MatchedTerm)
|
||||||
|
if _, err := f.WriteString(line); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fileExists(p string) bool {
|
||||||
|
_, err := os.Stat(p)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteTunnel appends a bidirectional wikilink between sourcePath and
|
||||||
|
// targetPath under a `## See also` section in each note. Paths are
|
||||||
|
// relative to brainDir (forward-slashed), e.g. wiki/<wing>/<hall>/<slug>.md.
|
||||||
|
//
|
||||||
|
// Idempotent: re-calling with the same pair does not duplicate links or
|
||||||
|
// section headers. Rejects same-wing pairs (a tunnel is by definition
|
||||||
|
// cross-wing) and missing notes.
|
||||||
|
func WriteTunnel(brainDir, sourcePath, targetPath string) error {
|
||||||
|
srcWing, err := wingOf(sourcePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("source: %w", err)
|
||||||
|
}
|
||||||
|
tgtWing, err := wingOf(targetPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("target: %w", err)
|
||||||
|
}
|
||||||
|
if srcWing == tgtWing {
|
||||||
|
return fmt.Errorf("tunnel must cross wings; got both in %q", srcWing)
|
||||||
|
}
|
||||||
|
|
||||||
|
srcFull := filepath.Join(brainDir, filepath.FromSlash(sourcePath))
|
||||||
|
tgtFull := filepath.Join(brainDir, filepath.FromSlash(targetPath))
|
||||||
|
if _, err := os.Stat(srcFull); err != nil {
|
||||||
|
return fmt.Errorf("source note: %w", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(tgtFull); err != nil {
|
||||||
|
return fmt.Errorf("target note: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := appendSeeAlso(srcFull, wikilinkOf(targetPath)); err != nil {
|
||||||
|
return fmt.Errorf("update source: %w", err)
|
||||||
|
}
|
||||||
|
if err := appendSeeAlso(tgtFull, wikilinkOf(sourcePath)); err != nil {
|
||||||
|
return fmt.Errorf("update target: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// wikilinkOf turns "wiki/<wing>/<hall>/<slug>.md" into "<wing>/<hall>/<slug>"
|
||||||
|
// for use inside `[[...]]`.
|
||||||
|
func wikilinkOf(relPath string) string {
|
||||||
|
p := strings.TrimSuffix(relPath, ".md")
|
||||||
|
p = strings.TrimPrefix(p, "wiki/")
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// wingOf extracts the wing segment from a relative wiki path
|
||||||
|
// "wiki/<wing>/<hall>/<slug>.md".
|
||||||
|
func wingOf(relPath string) (string, error) {
|
||||||
|
parts := strings.Split(relPath, "/")
|
||||||
|
if len(parts) < 4 || parts[0] != "wiki" {
|
||||||
|
return "", fmt.Errorf("not a wiki path: %q", relPath)
|
||||||
|
}
|
||||||
|
if parts[1] == "" {
|
||||||
|
return "", fmt.Errorf("empty wing in path: %q", relPath)
|
||||||
|
}
|
||||||
|
return parts[1], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// appendSeeAlso inserts `- [[link]]` under the file's See also section,
|
||||||
|
// creating the section if absent. No-op when the link is already present.
|
||||||
|
func appendSeeAlso(filePath, link string) error {
|
||||||
|
content, err := os.ReadFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
wikilink := "[[" + link + "]]"
|
||||||
|
if strings.Contains(string(content), wikilink) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bullet := "- " + wikilink
|
||||||
|
|
||||||
|
if !strings.Contains(string(content), seeAlsoHeader) {
|
||||||
|
// No section yet — append a fresh one. Always emit a trailing
|
||||||
|
// newline so subsequent appends don't merge into the previous line.
|
||||||
|
trimmed := strings.TrimRight(string(content), "\n")
|
||||||
|
out := trimmed + "\n\n" + seeAlsoHeader + "\n\n" + bullet + "\n"
|
||||||
|
return os.WriteFile(filePath, []byte(out), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Section exists — splice the bullet in just before the next `## `
|
||||||
|
// heading (or EOF). Reading the file line-by-line keeps this robust
|
||||||
|
// against arbitrary section ordering.
|
||||||
|
var b strings.Builder
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||||
|
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||||
|
inSeeAlso, inserted := false, false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if !inserted && inSeeAlso && strings.HasPrefix(line, "## ") &&
|
||||||
|
strings.TrimSpace(line) != seeAlsoHeader {
|
||||||
|
b.WriteString(bullet)
|
||||||
|
b.WriteByte('\n')
|
||||||
|
b.WriteByte('\n')
|
||||||
|
inserted = true
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(line) == seeAlsoHeader {
|
||||||
|
inSeeAlso = true
|
||||||
|
}
|
||||||
|
b.WriteString(line)
|
||||||
|
b.WriteByte('\n')
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !inserted {
|
||||||
|
// section was the last thing in the file — just append bullet
|
||||||
|
out := strings.TrimRight(b.String(), "\n") + "\n" + bullet + "\n"
|
||||||
|
return os.WriteFile(filePath, []byte(out), 0o644)
|
||||||
|
}
|
||||||
|
return os.WriteFile(filePath, []byte(b.String()), 0o644)
|
||||||
|
}
|
||||||
177
ingestion/internal/brain/tunnel_test.go
Normal file
177
ingestion/internal/brain/tunnel_test.go
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
package brain_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// seedNote writes a minimal markdown note at brainDir/relPath with the given body.
|
||||||
|
func seedNote(t *testing.T, brainDir, relPath, body string) {
|
||||||
|
t.Helper()
|
||||||
|
full := filepath.Join(brainDir, relPath)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(body), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteTunnel_AppendsBidirectionalLinks(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/decisions/val-vol.md",
|
||||||
|
"---\nwing: jepa-fx\nhall: decisions\n---\n# Val Vol R2\n\nbody.\n")
|
||||||
|
seedNote(t, dir, "wiki/hyperguild/decisions/routing.md",
|
||||||
|
"---\nwing: hyperguild\nhall: decisions\n---\n# Routing\n\nbody.\n")
|
||||||
|
|
||||||
|
err := brain.WriteTunnel(dir,
|
||||||
|
"wiki/jepa-fx/decisions/val-vol.md",
|
||||||
|
"wiki/hyperguild/decisions/routing.md",
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
src, err := os.ReadFile(filepath.Join(dir, "wiki/jepa-fx/decisions/val-vol.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(src), "## See also")
|
||||||
|
assert.Contains(t, string(src), "[[hyperguild/decisions/routing]]")
|
||||||
|
|
||||||
|
tgt, err := os.ReadFile(filepath.Join(dir, "wiki/hyperguild/decisions/routing.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(tgt), "## See also")
|
||||||
|
assert.Contains(t, string(tgt), "[[jepa-fx/decisions/val-vol]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteTunnel_Idempotent(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/a/facts/x.md", "# X\n\nbody.\n")
|
||||||
|
seedNote(t, dir, "wiki/b/facts/y.md", "# Y\n\nbody.\n")
|
||||||
|
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
require.NoError(t, brain.WriteTunnel(dir,
|
||||||
|
"wiki/a/facts/x.md", "wiki/b/facts/y.md"))
|
||||||
|
}
|
||||||
|
|
||||||
|
src, err := os.ReadFile(filepath.Join(dir, "wiki/a/facts/x.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, strings.Count(string(src), "[[b/facts/y]]"),
|
||||||
|
"link should appear exactly once after 3 calls")
|
||||||
|
assert.Equal(t, 1, strings.Count(string(src), "## See also"))
|
||||||
|
|
||||||
|
tgt, err := os.ReadFile(filepath.Join(dir, "wiki/b/facts/y.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, strings.Count(string(tgt), "[[a/facts/x]]"))
|
||||||
|
assert.Equal(t, 1, strings.Count(string(tgt), "## See also"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteTunnel_RejectsSameWing(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/facts/x.md", "x")
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/facts/y.md", "y")
|
||||||
|
err := brain.WriteTunnel(dir,
|
||||||
|
"wiki/jepa-fx/facts/x.md", "wiki/jepa-fx/facts/y.md")
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "cross wings")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteTunnel_RejectsMissingNote(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/a/facts/x.md", "x")
|
||||||
|
err := brain.WriteTunnel(dir,
|
||||||
|
"wiki/a/facts/x.md", "wiki/b/facts/ghost.md")
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectTunnels_ExactTitleMatch(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/decisions/val-vol.md",
|
||||||
|
"---\nwing: jepa-fx\nhall: decisions\ntitle: Val Vol R2\n---\nbody.\n")
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/facts/lejpa.md",
|
||||||
|
"---\nwing: jepa-fx\nhall: facts\ntitle: LeJPA Architecture\n---\nbody.\n")
|
||||||
|
|
||||||
|
candidates, err := brain.DetectTunnels(dir,
|
||||||
|
"We need to revisit Val Vol R2 in light of new tier data.")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.Len(t, candidates, 1)
|
||||||
|
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", candidates[0].TargetPath)
|
||||||
|
assert.Equal(t, "Val Vol R2", candidates[0].MatchedTerm)
|
||||||
|
assert.True(t, candidates[0].Exact)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectTunnels_FuzzyMatch(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/x/facts/routing.md",
|
||||||
|
"---\ntitle: Routing\n---\nbody.\n")
|
||||||
|
|
||||||
|
// Substring of title appears in content, but not as a whole word.
|
||||||
|
candidates, err := brain.DetectTunnels(dir, "rerouting handles failover")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, candidates, 1)
|
||||||
|
assert.False(t, candidates[0].Exact, "substring-only match should be fuzzy")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectTunnels_NoFrontmatterFallsBackToSlug(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/x/facts/widget-flags.md", "# widget flags\n\nbody.\n")
|
||||||
|
|
||||||
|
candidates, err := brain.DetectTunnels(dir,
|
||||||
|
"Documented Widget Flags after the deploy issue.")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, candidates, 1)
|
||||||
|
assert.True(t, candidates[0].Exact)
|
||||||
|
assert.Equal(t, "widget flags", candidates[0].MatchedTerm)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAutoTunnel_FuzzyGoesToCandidatesFile(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Existing note in a different wing whose title is "Routing".
|
||||||
|
seedNote(t, dir, "wiki/other/facts/routing.md",
|
||||||
|
"---\nwing: other\nhall: facts\ntitle: Routing\n---\nbody.\n")
|
||||||
|
// Source note in another wing whose body mentions "rerouting" (substring match only).
|
||||||
|
seedNote(t, dir, "wiki/jepa-fx/facts/new.md",
|
||||||
|
"---\nwing: jepa-fx\nhall: facts\n---\nrerouting traffic\n")
|
||||||
|
|
||||||
|
require.NoError(t, brain.AutoTunnel(dir,
|
||||||
|
"wiki/jepa-fx/facts/new.md", "rerouting traffic"))
|
||||||
|
|
||||||
|
// Source must not get auto-linked (fuzzy).
|
||||||
|
got, err := os.ReadFile(filepath.Join(dir, "wiki/jepa-fx/facts/new.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotContains(t, string(got), "[[other/facts/routing]]")
|
||||||
|
|
||||||
|
// Candidates file must list the pair.
|
||||||
|
matches, err := filepath.Glob(filepath.Join(dir, "raw", "tunnel-candidates-*.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, matches, 1)
|
||||||
|
body, err := os.ReadFile(matches[0])
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(body), "wiki/jepa-fx/facts/new.md")
|
||||||
|
assert.Contains(t, string(body), "wiki/other/facts/routing.md")
|
||||||
|
assert.Contains(t, string(body), "Routing")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectTunnels_EmptyWiki(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
cs, err := brain.DetectTunnels(dir, "anything")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, cs)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteTunnel_AppendsToExistingSeeAlso(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
seedNote(t, dir, "wiki/a/facts/x.md",
|
||||||
|
"# X\n\nbody.\n\n## See also\n\n- [[a/facts/old]]\n")
|
||||||
|
seedNote(t, dir, "wiki/b/facts/y.md", "# Y\n\nbody.\n")
|
||||||
|
|
||||||
|
require.NoError(t, brain.WriteTunnel(dir,
|
||||||
|
"wiki/a/facts/x.md", "wiki/b/facts/y.md"))
|
||||||
|
|
||||||
|
src, err := os.ReadFile(filepath.Join(dir, "wiki/a/facts/x.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
s := string(src)
|
||||||
|
assert.Equal(t, 1, strings.Count(s, "## See also"), "should reuse existing section")
|
||||||
|
assert.Contains(t, s, "[[a/facts/old]]")
|
||||||
|
assert.Contains(t, s, "[[b/facts/y]]")
|
||||||
|
}
|
||||||
76
ingestion/internal/embed/embed.go
Normal file
76
ingestion/internal/embed/embed.go
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
// Package embed produces dense vector embeddings for brain content.
|
||||||
|
//
|
||||||
|
// Wire format is Ollama's `/api/embed`, with the canonical request shape
|
||||||
|
// `{"model": "...", "input": "..."}` and a 2-D `embeddings` response.
|
||||||
|
// Default deployment runs `nomic-embed-text` on iguana, which returns
|
||||||
|
// 768-dim vectors compatible with the brain_embeddings table schema.
|
||||||
|
package embed
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client posts embedding requests to an Ollama-compatible endpoint.
|
||||||
|
type Client struct {
|
||||||
|
URL string
|
||||||
|
Model string
|
||||||
|
HTTP *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// New constructs a Client. Returns nil when url is empty so callers can
|
||||||
|
// treat a missing BRAIN_EMBED_URL as "feature disabled" via a single nil
|
||||||
|
// check.
|
||||||
|
func New(url, model string) *Client {
|
||||||
|
if url == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &Client{
|
||||||
|
URL: strings.TrimRight(url, "/"),
|
||||||
|
Model: model,
|
||||||
|
HTTP: &http.Client{Timeout: 30 * time.Second},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embed returns the embedding vector for text. Empty text is rejected
|
||||||
|
// up-front to keep upstream errors from masking caller mistakes.
|
||||||
|
func (c *Client) Embed(ctx context.Context, text string) ([]float32, error) {
|
||||||
|
if strings.TrimSpace(text) == "" {
|
||||||
|
return nil, fmt.Errorf("embed: empty text")
|
||||||
|
}
|
||||||
|
reqBody, _ := json.Marshal(map[string]any{
|
||||||
|
"model": c.Model,
|
||||||
|
"input": text,
|
||||||
|
})
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||||
|
c.URL+"/api/embed", bytes.NewReader(reqBody))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
resp, err := c.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
if resp.StatusCode/100 != 2 {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return nil, fmt.Errorf("embed: status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
var out struct {
|
||||||
|
Embeddings [][]float32 `json:"embeddings"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
||||||
|
return nil, fmt.Errorf("embed: decode: %w", err)
|
||||||
|
}
|
||||||
|
if len(out.Embeddings) == 0 || len(out.Embeddings[0]) == 0 {
|
||||||
|
return nil, fmt.Errorf("embed: empty embeddings in response")
|
||||||
|
}
|
||||||
|
return out.Embeddings[0], nil
|
||||||
|
}
|
||||||
74
ingestion/internal/embed/embed_test.go
Normal file
74
ingestion/internal/embed/embed_test.go
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
package embed_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/embed"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNew_EmptyURLReturnsNil(t *testing.T) {
|
||||||
|
assert.Nil(t, embed.New("", "model"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmbed_ReturnsVector(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/api/embed", r.URL.Path)
|
||||||
|
var req map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||||
|
assert.Equal(t, "nomic", req["model"])
|
||||||
|
assert.Equal(t, "hello", req["input"])
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"embeddings": [][]float32{{0.1, 0.2, 0.3}},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := embed.New(srv.URL, "nomic")
|
||||||
|
require.NotNil(t, c)
|
||||||
|
v, err := c.Embed(context.Background(), "hello")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, []float32{0.1, 0.2, 0.3}, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmbed_StripsTrailingSlashFromURL(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, "/api/embed", r.URL.Path)
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"embeddings": [][]float32{{1.0}}})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
c := embed.New(srv.URL+"/", "nomic")
|
||||||
|
_, err := c.Embed(context.Background(), "x")
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmbed_PropagatesUpstreamError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusBadGateway)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
c := embed.New(srv.URL, "m")
|
||||||
|
_, err := c.Embed(context.Background(), "x")
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmbed_RejectsEmptyEmbeddingsArray(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"embeddings": [][]float32{}})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
c := embed.New(srv.URL, "m")
|
||||||
|
_, err := c.Embed(context.Background(), "x")
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmbed_RejectsEmptyText(t *testing.T) {
|
||||||
|
c := embed.New("http://127.0.0.1:1", "m")
|
||||||
|
_, err := c.Embed(context.Background(), "")
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
29
ingestion/internal/llm/router.go
Normal file
29
ingestion/internal/llm/router.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Router calls Primary first; on any error falls back to Fallback.
|
||||||
|
// Fallback may be nil, in which case primary errors are returned directly.
|
||||||
|
type Router struct {
|
||||||
|
Primary *Client
|
||||||
|
Fallback *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Complete implements pipeline.CompleteFunc, routing through Primary then Fallback.
|
||||||
|
func (r *Router) Complete(ctx context.Context, system, user string) (string, error) {
|
||||||
|
out, err := r.Primary.Complete(ctx, system, user)
|
||||||
|
if err == nil {
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
if r.Fallback == nil {
|
||||||
|
return "", fmt.Errorf("primary llm: %w", err)
|
||||||
|
}
|
||||||
|
out, err2 := r.Fallback.Complete(ctx, system, user)
|
||||||
|
if err2 != nil {
|
||||||
|
return "", fmt.Errorf("primary llm: %w; fallback llm: %v", err, err2)
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
71
ingestion/internal/llm/router_test.go
Normal file
71
ingestion/internal/llm/router_test.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRouter_PrimarySucceeds(t *testing.T) {
|
||||||
|
primary := mockServer(t, "from-primary")
|
||||||
|
defer primary.Close()
|
||||||
|
fallback := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Error("fallback must not be called when primary succeeds")
|
||||||
|
}))
|
||||||
|
defer fallback.Close()
|
||||||
|
|
||||||
|
r := &Router{
|
||||||
|
Primary: New(primary.URL, "", "m", time.Second),
|
||||||
|
Fallback: New(fallback.URL, "", "m", time.Second),
|
||||||
|
}
|
||||||
|
out, err := r.Complete(context.Background(), "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "from-primary", out)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRouter_FallsBackOnPrimaryError(t *testing.T) {
|
||||||
|
primary := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "unavailable", http.StatusServiceUnavailable)
|
||||||
|
}))
|
||||||
|
defer primary.Close()
|
||||||
|
fallback := mockServer(t, "from-fallback")
|
||||||
|
defer fallback.Close()
|
||||||
|
|
||||||
|
r := &Router{
|
||||||
|
Primary: New(primary.URL, "", "m", time.Second),
|
||||||
|
Fallback: New(fallback.URL, "", "m", time.Second),
|
||||||
|
}
|
||||||
|
out, err := r.Complete(context.Background(), "sys", "user")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "from-fallback", out)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRouter_BothFail(t *testing.T) {
|
||||||
|
fail := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "err", http.StatusBadGateway)
|
||||||
|
}))
|
||||||
|
defer fail.Close()
|
||||||
|
|
||||||
|
r := &Router{
|
||||||
|
Primary: New(fail.URL, "", "m", time.Second),
|
||||||
|
Fallback: New(fail.URL, "", "m", time.Second),
|
||||||
|
}
|
||||||
|
_, err := r.Complete(context.Background(), "sys", "user")
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRouter_NilFallback(t *testing.T) {
|
||||||
|
fail := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "err", http.StatusBadGateway)
|
||||||
|
}))
|
||||||
|
defer fail.Close()
|
||||||
|
|
||||||
|
r := &Router{Primary: New(fail.URL, "", "m", time.Second)}
|
||||||
|
_, err := r.Complete(context.Background(), "sys", "user")
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
@@ -1,23 +1,65 @@
|
|||||||
package mcp
|
package mcp
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/subtle"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
||||||
)
|
)
|
||||||
|
|
||||||
// BearerAuth returns a middleware that enforces a static bearer token on every
|
// BearerAuth gates an HTTP handler behind dual-mode authentication.
|
||||||
// request. token must be non-empty; if it is empty, every request is rejected.
|
//
|
||||||
func BearerAuth(token string, next http.Handler) http.Handler {
|
// Auth precedence:
|
||||||
|
//
|
||||||
|
// 1. Static Bearer match (constant-time compare against staticToken).
|
||||||
|
// Wins immediately and never emits a WWW-Authenticate header. This is
|
||||||
|
// the path used by internal Tailscale/LAN CLI callers that supply
|
||||||
|
// `Authorization: Bearer $BRAIN_MCP_TOKEN` via `.mcp.json`. Returning
|
||||||
|
// 200 without a WWW-Authenticate prevents the MCP client from
|
||||||
|
// speculatively flipping into OAuth-discovery mode.
|
||||||
|
// 2. Dex JWT validation (when validator is non-nil). Used by claude.ai
|
||||||
|
// custom MCP connectors that finished the OAuth handshake.
|
||||||
|
// 3. Otherwise 401. When resourceMetadataURL is non-empty, a
|
||||||
|
// `WWW-Authenticate: Bearer resource_metadata="…"` header is emitted
|
||||||
|
// per RFC 9728 §6.2 so claude.ai's OAuth discovery flow can find the
|
||||||
|
// server's protected-resource metadata document.
|
||||||
|
//
|
||||||
|
// The order matters: a valid static Bearer must short-circuit BEFORE any
|
||||||
|
// JWT path runs, because a non-empty WWW-Authenticate emitted on the
|
||||||
|
// fall-through 401 confuses static-Bearer-only clients into discarding
|
||||||
|
// their header and starting an OAuth handshake instead.
|
||||||
|
func BearerAuth(staticToken string, validator *auth.Validator, resourceMetadataURL string, next http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
if token == "" {
|
rawToken, ok := strings.CutPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||||
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
if !ok {
|
||||||
return
|
unauthorized(w, resourceMetadataURL)
|
||||||
}
|
|
||||||
got, ok := strings.CutPrefix(r.Header.Get("Authorization"), "Bearer ")
|
|
||||||
if !ok || got != token {
|
|
||||||
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1. Static Bearer wins first — never emits a challenge.
|
||||||
|
if staticToken != "" && subtle.ConstantTimeCompare([]byte(rawToken), []byte(staticToken)) == 1 {
|
||||||
next.ServeHTTP(w, r)
|
next.ServeHTTP(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Then Dex JWT, if configured.
|
||||||
|
if validator != nil {
|
||||||
|
if _, err := validator.Validate(r.Context(), rawToken); err == nil {
|
||||||
|
next.ServeHTTP(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Reject with an OAuth resource-metadata challenge if configured.
|
||||||
|
unauthorized(w, resourceMetadataURL)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func unauthorized(w http.ResponseWriter, resourceMetadataURL string) {
|
||||||
|
if resourceMetadataURL != "" {
|
||||||
|
w.Header().Set("WWW-Authenticate",
|
||||||
|
`Bearer realm="brain", resource_metadata="`+resourceMetadataURL+`"`)
|
||||||
|
}
|
||||||
|
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,18 +1,34 @@
|
|||||||
package mcp_test
|
package mcp_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/rsa"
|
||||||
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwa"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwk"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwt"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/auth"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestBearerAuth_MissingHeader(t *testing.T) {
|
const testResourceMetadataURL = "https://brain-mcp.d-ma.be/.well-known/oauth-protected-resource"
|
||||||
handler := mcp.BearerAuth("secret", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
||||||
|
func okHandler() http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
}))
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBearerAuth_MissingHeader(t *testing.T) {
|
||||||
|
handler := mcp.BearerAuth("secret", nil, "", okHandler())
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
rr := httptest.NewRecorder()
|
rr := httptest.NewRecorder()
|
||||||
handler.ServeHTTP(rr, req)
|
handler.ServeHTTP(rr, req)
|
||||||
@@ -20,9 +36,7 @@ func TestBearerAuth_MissingHeader(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestBearerAuth_WrongToken(t *testing.T) {
|
func TestBearerAuth_WrongToken(t *testing.T) {
|
||||||
handler := mcp.BearerAuth("secret", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
handler := mcp.BearerAuth("secret", nil, "", okHandler())
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
}))
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
req.Header.Set("Authorization", "Bearer wrong")
|
req.Header.Set("Authorization", "Bearer wrong")
|
||||||
rr := httptest.NewRecorder()
|
rr := httptest.NewRecorder()
|
||||||
@@ -32,7 +46,7 @@ func TestBearerAuth_WrongToken(t *testing.T) {
|
|||||||
|
|
||||||
func TestBearerAuth_CorrectToken(t *testing.T) {
|
func TestBearerAuth_CorrectToken(t *testing.T) {
|
||||||
called := false
|
called := false
|
||||||
handler := mcp.BearerAuth("secret", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
handler := mcp.BearerAuth("secret", nil, "", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
called = true
|
called = true
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
}))
|
}))
|
||||||
@@ -45,12 +59,144 @@ func TestBearerAuth_CorrectToken(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestBearerAuth_EmptyConfiguredToken(t *testing.T) {
|
func TestBearerAuth_EmptyConfiguredToken(t *testing.T) {
|
||||||
// Server started without a token configured — every request must fail.
|
handler := mcp.BearerAuth("", nil, "", okHandler())
|
||||||
handler := mcp.BearerAuth("", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
}))
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
rr := httptest.NewRecorder()
|
rr := httptest.NewRecorder()
|
||||||
handler.ServeHTTP(rr, req)
|
handler.ServeHTTP(rr, req)
|
||||||
assert.Equal(t, http.StatusUnauthorized, rr.Code)
|
assert.Equal(t, http.StatusUnauthorized, rr.Code)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Issue #9: a valid static Bearer must never emit a WWW-Authenticate header,
|
||||||
|
// even when a resource-metadata URL is configured. The presence of that
|
||||||
|
// header on a 200 response would flip MCP CLI clients into OAuth-discovery
|
||||||
|
// mode and break static-Bearer auth from `.mcp.json` on Tailscale/LAN.
|
||||||
|
func TestBearerAuth_ValidStaticBearer_NoWWWAuthenticate(t *testing.T) {
|
||||||
|
handler := mcp.BearerAuth("secret", nil, testResourceMetadataURL, okHandler())
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
req.Header.Set("Authorization", "Bearer secret")
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
assert.Empty(t, rr.Header().Get("WWW-Authenticate"), "static-Bearer 200 must not advertise OAuth")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Issue #9: a 401 with resource-metadata configured must emit a
|
||||||
|
// WWW-Authenticate header so claude.ai discovers the protected-resource
|
||||||
|
// metadata document and continues the OAuth dance.
|
||||||
|
func TestBearerAuth_Unauthorized_EmitsResourceMetadataChallenge(t *testing.T) {
|
||||||
|
handler := mcp.BearerAuth("secret", nil, testResourceMetadataURL, okHandler())
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, rr.Code)
|
||||||
|
got := rr.Header().Get("WWW-Authenticate")
|
||||||
|
assert.Contains(t, got, `Bearer realm="brain"`)
|
||||||
|
assert.Contains(t, got, `resource_metadata="`+testResourceMetadataURL+`"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Static-Bearer-only deployment: no resource-metadata URL, no challenge
|
||||||
|
// header on 401 — matches pre-#9 behaviour for tests without Dex wired.
|
||||||
|
func TestBearerAuth_Unauthorized_NoChallengeWhenResourceUnset(t *testing.T) {
|
||||||
|
handler := mcp.BearerAuth("secret", nil, "", okHandler())
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, rr.Code)
|
||||||
|
assert.Empty(t, rr.Header().Get("WWW-Authenticate"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// JWT auth tests
|
||||||
|
|
||||||
|
func buildOIDCServer(t *testing.T) (*httptest.Server, jwk.Key) {
|
||||||
|
t.Helper()
|
||||||
|
raw, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||||
|
require.NoError(t, err)
|
||||||
|
priv, err := jwk.FromRaw(raw)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, priv.Set(jwk.KeyIDKey, "k1"))
|
||||||
|
require.NoError(t, priv.Set(jwk.AlgorithmKey, jwa.RS256))
|
||||||
|
pub, err := jwk.PublicKeyOf(priv)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
set := jwk.NewSet()
|
||||||
|
require.NoError(t, set.AddKey(pub))
|
||||||
|
jwksBytes, err := json.Marshal(set)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
muxSrv := http.NewServeMux()
|
||||||
|
var srv *httptest.Server
|
||||||
|
muxSrv.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]string{
|
||||||
|
"issuer": srv.URL,
|
||||||
|
"jwks_uri": srv.URL + "/jwks",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
muxSrv.HandleFunc("/jwks", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_, _ = w.Write(jwksBytes)
|
||||||
|
})
|
||||||
|
srv = httptest.NewServer(muxSrv)
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
return srv, priv
|
||||||
|
}
|
||||||
|
|
||||||
|
func signJWT(t *testing.T, priv jwk.Key, issuer, audience string, exp time.Time) string {
|
||||||
|
t.Helper()
|
||||||
|
tok, err := jwt.NewBuilder().
|
||||||
|
Issuer(issuer).Audience([]string{audience}).
|
||||||
|
Subject("s").Expiration(exp).
|
||||||
|
Build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
signed, err := jwt.Sign(tok, jwt.WithKey(jwa.RS256, priv))
|
||||||
|
require.NoError(t, err)
|
||||||
|
return string(signed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBearerAuth_ValidJWT(t *testing.T) {
|
||||||
|
oidcSrv, priv := buildOIDCServer(t)
|
||||||
|
v, err := auth.NewValidator(oidcSrv.URL, "brain")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
called := false
|
||||||
|
handler := mcp.BearerAuth("static-secret", v, "", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
called = true
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
}))
|
||||||
|
|
||||||
|
token := signJWT(t, priv, oidcSrv.URL, "brain", time.Now().Add(time.Hour))
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
assert.True(t, called)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBearerAuth_InvalidJWT_FallsBackToStaticToken(t *testing.T) {
|
||||||
|
oidcSrv, _ := buildOIDCServer(t)
|
||||||
|
v, err := auth.NewValidator(oidcSrv.URL, "brain")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
handler := mcp.BearerAuth("static-secret", v, "", okHandler())
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
req.Header.Set("Authorization", "Bearer static-secret")
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBearerAuth_InvalidJWT_WrongStaticToken(t *testing.T) {
|
||||||
|
oidcSrv, priv := buildOIDCServer(t)
|
||||||
|
v, err := auth.NewValidator(oidcSrv.URL, "brain")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
handler := mcp.BearerAuth("static-secret", v, "", okHandler())
|
||||||
|
// Expired JWT — JWT fails, static token doesn't match either
|
||||||
|
token := signJWT(t, priv, oidcSrv.URL, "brain", time.Now().Add(-time.Hour))
|
||||||
|
req := httptest.NewRequest(http.MethodPost, "/mcp", nil)
|
||||||
|
req.Header.Set("Authorization", "Bearer "+token)
|
||||||
|
|
||||||
|
_ = context.Background() // satisfies import
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rr, req)
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, rr.Code)
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/api"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/extract"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
@@ -24,6 +26,10 @@ func (s *Server) tools() []map[string]any {
|
|||||||
int_ := func(desc string) map[string]any {
|
int_ := func(desc string) map[string]any {
|
||||||
return map[string]any{"type": "integer", "description": desc}
|
return map[string]any{"type": "integer", "description": desc}
|
||||||
}
|
}
|
||||||
|
enum := func(desc string, vals ...string) map[string]any {
|
||||||
|
return map[string]any{"type": "string", "description": desc, "enum": vals}
|
||||||
|
}
|
||||||
|
halls := []string{"facts", "decisions", "failures", "hypotheses", "sources"}
|
||||||
schema := func(required []string, props map[string]any) json.RawMessage {
|
schema := func(required []string, props map[string]any) json.RawMessage {
|
||||||
b, _ := json.Marshal(map[string]any{
|
b, _ := json.Marshal(map[string]any{
|
||||||
"type": "object", "required": required, "properties": props,
|
"type": "object", "required": required, "properties": props,
|
||||||
@@ -34,20 +40,39 @@ func (s *Server) tools() []map[string]any {
|
|||||||
return []map[string]any{
|
return []map[string]any{
|
||||||
{
|
{
|
||||||
"name": "brain_query",
|
"name": "brain_query",
|
||||||
"description": "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files.",
|
"description": "BM25 full-text search across brain/knowledge/ and brain/wiki/ markdown files. Optionally scope by wing (topic domain) and hall (memory type).",
|
||||||
"inputSchema": schema([]string{"query"}, map[string]any{
|
"inputSchema": schema([]string{"query"}, map[string]any{
|
||||||
"query": str("search terms"),
|
"query": str("search terms"),
|
||||||
"limit": int_("max results, default 5"),
|
"limit": int_("max results, default 5"),
|
||||||
|
"wing": str("optional wing to scope to, e.g. jepa-fx"),
|
||||||
|
"hall": enum("optional hall to scope to (requires wing)", halls...),
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "brain_write",
|
"name": "brain_write",
|
||||||
"description": "Write a raw knowledge note to brain/knowledge/.",
|
"description": "Write a markdown note to the brain. With wing+hall set, routes to brain/wiki/<wing>/<hall>/ with wing/hall/created_at frontmatter; otherwise writes to brain/knowledge/ (legacy).",
|
||||||
"inputSchema": schema([]string{"content"}, map[string]any{
|
"inputSchema": schema([]string{"content"}, map[string]any{
|
||||||
"content": str("markdown content"),
|
"content": str("markdown content"),
|
||||||
"filename": str("optional filename"),
|
"filename": str("optional filename or slug"),
|
||||||
"type": str("optional frontmatter type"),
|
"type": str("optional frontmatter type (legacy)"),
|
||||||
"domain": str("optional frontmatter domain"),
|
"domain": str("optional frontmatter domain (legacy)"),
|
||||||
|
"wing": str("optional topic domain, e.g. jepa-fx"),
|
||||||
|
"hall": enum("optional memory type (requires wing)", halls...),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "brain_tunnel",
|
||||||
|
"description": "Create an explicit bidirectional [[wikilink]] between two notes in different wings. Idempotent.",
|
||||||
|
"inputSchema": schema([]string{"source", "target"}, map[string]any{
|
||||||
|
"source": str("path of source note relative to brain dir, e.g. wiki/jepa-fx/decisions/val-vol.md"),
|
||||||
|
"target": str("path of target note (must be in a different wing)"),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "brain_index",
|
||||||
|
"description": "Regenerate _index.md (Map of Content) for one or all wings under brain/wiki/. Auto-called after brain_write with wing+hall.",
|
||||||
|
"inputSchema": schema([]string{}, map[string]any{
|
||||||
|
"wing": str("optional wing to index; if absent, rebuilds every wing"),
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -69,6 +94,20 @@ func (s *Server) tools() []map[string]any {
|
|||||||
"dry_run": map[string]any{"type": "boolean"},
|
"dry_run": map[string]any{"type": "boolean"},
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "brain_answer",
|
||||||
|
"description": "Retrieve relevant brain content via BM25 and synthesize a coherent answer using an LLM.",
|
||||||
|
"inputSchema": schema([]string{"query"}, map[string]any{
|
||||||
|
"query": str("question to answer"),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "brain_classify",
|
||||||
|
"description": "Classify raw text into doc type, title, and tags using an LLM.",
|
||||||
|
"inputSchema": schema([]string{"text"}, map[string]any{
|
||||||
|
"text": str("raw document text to classify (first 3000 chars used)"),
|
||||||
|
}),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "session_log",
|
"name": "session_log",
|
||||||
"description": "Append a structured entry to brain/sessions/<session_id>.jsonl.",
|
"description": "Append a structured entry to brain/sessions/<session_id>.jsonl.",
|
||||||
@@ -90,6 +129,8 @@ func (s *Server) tools() []map[string]any {
|
|||||||
type brainQueryArgs struct {
|
type brainQueryArgs struct {
|
||||||
Query string `json:"query"`
|
Query string `json:"query"`
|
||||||
Limit int `json:"limit,omitempty"`
|
Limit int `json:"limit,omitempty"`
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
Hall string `json:"hall,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) brainQuery(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
func (s *Server) brainQuery(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
@@ -103,7 +144,14 @@ func (s *Server) brainQuery(ctx context.Context, args json.RawMessage) (json.Raw
|
|||||||
if a.Limit == 0 {
|
if a.Limit == 0 {
|
||||||
a.Limit = 5
|
a.Limit = 5
|
||||||
}
|
}
|
||||||
results, err := search.Query(s.brainDir, a.Query, a.Limit)
|
results, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
|
||||||
|
Query: a.Query,
|
||||||
|
Limit: a.Limit,
|
||||||
|
Wing: a.Wing,
|
||||||
|
Hall: a.Hall,
|
||||||
|
Vector: s.vector,
|
||||||
|
Embedder: s.embedder,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("search: %w", err)
|
return nil, fmt.Errorf("search: %w", err)
|
||||||
}
|
}
|
||||||
@@ -115,6 +163,8 @@ type brainWriteArgs struct {
|
|||||||
Filename string `json:"filename,omitempty"`
|
Filename string `json:"filename,omitempty"`
|
||||||
Type string `json:"type,omitempty"`
|
Type string `json:"type,omitempty"`
|
||||||
Domain string `json:"domain,omitempty"`
|
Domain string `json:"domain,omitempty"`
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
Hall string `json:"hall,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) brainWrite(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
func (s *Server) brainWrite(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
@@ -122,13 +172,73 @@ func (s *Server) brainWrite(ctx context.Context, args json.RawMessage) (json.Raw
|
|||||||
if err := json.Unmarshal(args, &a); err != nil {
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
return nil, fmt.Errorf("parse args: %w", err)
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
}
|
}
|
||||||
relPath, err := api.WriteNote(s.brainDir, a.Content, a.Filename, a.Type, a.Domain)
|
relPath, err := api.WriteNote(s.brainDir, api.WriteNoteOptions{
|
||||||
|
Content: a.Content,
|
||||||
|
Filename: a.Filename,
|
||||||
|
Type: a.Type,
|
||||||
|
Domain: a.Domain,
|
||||||
|
Wing: a.Wing,
|
||||||
|
Hall: a.Hall,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
// Auto-regenerate the wing _index.md when the write landed in the
|
||||||
|
// structured wiki, and auto-tunnel cross-wing matches. Both are
|
||||||
|
// best-effort: the note is already written.
|
||||||
|
if a.Wing != "" && a.Hall != "" {
|
||||||
|
if err := brain.BuildWingIndex(s.brainDir, a.Wing); err != nil {
|
||||||
|
slog.Warn("brain_write: auto-index failed", "wing", a.Wing, "err", err)
|
||||||
|
}
|
||||||
|
if err := brain.AutoTunnel(s.brainDir, relPath, a.Content); err != nil {
|
||||||
|
slog.Warn("brain_write: auto-tunnel failed", "src", relPath, "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
return json.Marshal(map[string]string{"path": relPath})
|
return json.Marshal(map[string]string{"path": relPath})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type brainTunnelArgs struct {
|
||||||
|
Source string `json:"source"`
|
||||||
|
Target string `json:"target"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) brainTunnel(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var a brainTunnelArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.Source == "" || a.Target == "" {
|
||||||
|
return nil, fmt.Errorf("source and target are required")
|
||||||
|
}
|
||||||
|
if err := brain.WriteTunnel(s.brainDir, a.Source, a.Target); err != nil {
|
||||||
|
return nil, fmt.Errorf("tunnel: %w", err)
|
||||||
|
}
|
||||||
|
return json.Marshal(map[string]string{"status": "ok"})
|
||||||
|
}
|
||||||
|
|
||||||
|
type brainIndexArgs struct {
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) brainIndex(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var a brainIndexArgs
|
||||||
|
if len(args) > 0 {
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if a.Wing == "" {
|
||||||
|
if err := brain.BuildAllWingIndexes(s.brainDir); err != nil {
|
||||||
|
return nil, fmt.Errorf("index: %w", err)
|
||||||
|
}
|
||||||
|
return json.Marshal(map[string]any{"status": "ok", "scope": "all"})
|
||||||
|
}
|
||||||
|
if err := brain.BuildWingIndex(s.brainDir, a.Wing); err != nil {
|
||||||
|
return nil, fmt.Errorf("index: %w", err)
|
||||||
|
}
|
||||||
|
return json.Marshal(map[string]any{"status": "ok", "scope": a.Wing})
|
||||||
|
}
|
||||||
|
|
||||||
type brainIngestRawArgs struct {
|
type brainIngestRawArgs struct {
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
Pages []pipeline.RawPage `json:"pages"`
|
Pages []pipeline.RawPage `json:"pages"`
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func TestBrainQueryReturnsResults(t *testing.T) {
|
|||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
|
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
resp := toolCall(t, srv, "brain_query", map[string]any{"query": "tdd"})
|
resp := toolCall(t, srv, "brain_query", map[string]any{"query": "tdd"})
|
||||||
|
|
||||||
require.Nil(t, resp["error"])
|
require.Nil(t, resp["error"])
|
||||||
@@ -53,7 +53,7 @@ func TestBrainQueryReturnsResults(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainWriteCreatesFile(t *testing.T) {
|
func TestBrainWriteCreatesFile(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_write", map[string]any{
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
"content": "# Test\n\nbody",
|
"content": "# Test\n\nbody",
|
||||||
@@ -70,9 +70,147 @@ func TestBrainWriteCreatesFile(t *testing.T) {
|
|||||||
assert.Contains(t, string(got), "# Test")
|
assert.Contains(t, string(got), "# Test")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBrainWriteWingHallRoutesToWiki(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
|
"content": "# Val Vol\n\nbody",
|
||||||
|
"filename": "val-vol-r2",
|
||||||
|
"wing": "jepa-fx",
|
||||||
|
"hall": "decisions",
|
||||||
|
})
|
||||||
|
require.Nil(t, resp["error"])
|
||||||
|
|
||||||
|
got, err := os.ReadFile(filepath.Join(brainDir, "wiki", "jepa-fx", "decisions", "val-vol-r2.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(got), "wing: jepa-fx")
|
||||||
|
assert.Contains(t, string(got), "hall: decisions")
|
||||||
|
assert.Contains(t, string(got), "created_at:")
|
||||||
|
assert.Contains(t, string(got), "# Val Vol")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainWriteRejectsInvalidHall(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
|
"content": "x",
|
||||||
|
"wing": "jepa-fx",
|
||||||
|
"hall": "garbage",
|
||||||
|
})
|
||||||
|
require.NotNil(t, resp["error"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainQueryWingScope(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
{"wiki/jepa-fx/facts/x.md", "---\nwing: jepa-fx\nhall: facts\n---\nfoo keyword.\n"},
|
||||||
|
{"wiki/other/facts/y.md", "---\nwing: other\nhall: facts\n---\nfoo keyword.\n"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(brainDir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
resp := toolCall(t, srv, "brain_query", map[string]any{
|
||||||
|
"query": "foo",
|
||||||
|
"wing": "jepa-fx",
|
||||||
|
})
|
||||||
|
require.Nil(t, resp["error"])
|
||||||
|
text := resp["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string)
|
||||||
|
assert.Contains(t, text, "wiki/jepa-fx/facts/x.md")
|
||||||
|
assert.NotContains(t, text, "wiki/other/facts/y.md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainWriteAutoTunnelsOnExactMatch(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
// Seed a pre-existing note in wing "other".
|
||||||
|
existing := filepath.Join(brainDir, "wiki/other/facts/widget.md")
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(existing), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(existing,
|
||||||
|
[]byte("---\nwing: other\nhall: facts\ntitle: Widget\n---\nbody.\n"), 0o644))
|
||||||
|
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
|
// Write a new note in a *different* wing whose content references "Widget".
|
||||||
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
|
"content": "# Notes\n\nThis note discusses the Widget concept.\n",
|
||||||
|
"filename": "notes",
|
||||||
|
"wing": "jepa-fx",
|
||||||
|
"hall": "facts",
|
||||||
|
})
|
||||||
|
require.Nil(t, resp["error"])
|
||||||
|
|
||||||
|
newNote := filepath.Join(brainDir, "wiki/jepa-fx/facts/notes.md")
|
||||||
|
got, err := os.ReadFile(newNote)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(got), "[[other/facts/widget]]", "new note should link to existing")
|
||||||
|
|
||||||
|
gotTgt, err := os.ReadFile(existing)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(gotTgt), "[[jepa-fx/facts/notes]]", "existing note should backlink")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainWriteAutoTunnelSkipsSameWing(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
existing := filepath.Join(brainDir, "wiki/jepa-fx/facts/widget.md")
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(existing), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(existing,
|
||||||
|
[]byte("---\nwing: jepa-fx\nhall: facts\ntitle: Widget\n---\nbody.\n"), 0o644))
|
||||||
|
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
|
"content": "Same wing reference to Widget here.\n",
|
||||||
|
"filename": "notes",
|
||||||
|
"wing": "jepa-fx",
|
||||||
|
"hall": "facts",
|
||||||
|
})
|
||||||
|
require.Nil(t, resp["error"])
|
||||||
|
|
||||||
|
newNote := filepath.Join(brainDir, "wiki/jepa-fx/facts/notes.md")
|
||||||
|
got, err := os.ReadFile(newNote)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.NotContains(t, string(got), "[[jepa-fx/facts/widget]]", "same-wing match must not auto-tunnel")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainTunnelLinksTwoNotes(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
{"wiki/jepa-fx/decisions/val-vol.md", "---\nwing: jepa-fx\nhall: decisions\n---\n# Val Vol\n"},
|
||||||
|
{"wiki/hyperguild/decisions/routing.md", "---\nwing: hyperguild\nhall: decisions\n---\n# Routing\n"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(brainDir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
resp := toolCall(t, srv, "brain_tunnel", map[string]any{
|
||||||
|
"source": "wiki/jepa-fx/decisions/val-vol.md",
|
||||||
|
"target": "wiki/hyperguild/decisions/routing.md",
|
||||||
|
})
|
||||||
|
require.Nil(t, resp["error"])
|
||||||
|
|
||||||
|
src, err := os.ReadFile(filepath.Join(brainDir, "wiki/jepa-fx/decisions/val-vol.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(src), "[[hyperguild/decisions/routing]]")
|
||||||
|
tgt, err := os.ReadFile(filepath.Join(brainDir, "wiki/hyperguild/decisions/routing.md"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Contains(t, string(tgt), "[[jepa-fx/decisions/val-vol]]")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainTunnelRejectsMissing(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
resp := toolCall(t, srv, "brain_tunnel", map[string]any{
|
||||||
|
"source": "wiki/a/facts/ghost.md",
|
||||||
|
"target": "wiki/b/facts/ghost.md",
|
||||||
|
})
|
||||||
|
require.NotNil(t, resp["error"])
|
||||||
|
}
|
||||||
|
|
||||||
func TestBrainWriteRejectsTraversal(t *testing.T) {
|
func TestBrainWriteRejectsTraversal(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_write", map[string]any{
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
"content": "x",
|
"content": "x",
|
||||||
@@ -83,7 +221,7 @@ func TestBrainWriteRejectsTraversal(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainWriteAcceptsDoubleDotInName(t *testing.T) {
|
func TestBrainWriteAcceptsDoubleDotInName(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_write", map[string]any{
|
resp := toolCall(t, srv, "brain_write", map[string]any{
|
||||||
"content": "x",
|
"content": "x",
|
||||||
@@ -98,7 +236,7 @@ func TestBrainWriteAcceptsDoubleDotInName(t *testing.T) {
|
|||||||
func TestBrainIngestRawDryRun(t *testing.T) {
|
func TestBrainIngestRawDryRun(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "wiki", "concepts"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(brainDir, "wiki", "concepts"), 0o755))
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_ingest_raw", map[string]any{
|
resp := toolCall(t, srv, "brain_ingest_raw", map[string]any{
|
||||||
"source": "test-source",
|
"source": "test-source",
|
||||||
@@ -130,7 +268,7 @@ func TestBrainIngestRawDryRun(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainIngestRejectsBoth(t *testing.T) {
|
func TestBrainIngestRejectsBoth(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
||||||
"content": "x",
|
"content": "x",
|
||||||
@@ -142,7 +280,7 @@ func TestBrainIngestRejectsBoth(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainIngestRequiresOne(t *testing.T) {
|
func TestBrainIngestRequiresOne(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_ingest", map[string]any{})
|
resp := toolCall(t, srv, "brain_ingest", map[string]any{})
|
||||||
require.NotNil(t, resp["error"])
|
require.NotNil(t, resp["error"])
|
||||||
@@ -150,7 +288,7 @@ func TestBrainIngestRequiresOne(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainIngestRejectsContentWithoutSource(t *testing.T) {
|
func TestBrainIngestRejectsContentWithoutSource(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
||||||
"content": "x",
|
"content": "x",
|
||||||
@@ -160,7 +298,7 @@ func TestBrainIngestRejectsContentWithoutSource(t *testing.T) {
|
|||||||
|
|
||||||
func TestBrainIngestRequiresLLMConfigured(t *testing.T) {
|
func TestBrainIngestRequiresLLMConfigured(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil) // nil pipelineCfg → no LLM
|
srv := mcp.NewServer(brainDir, nil, nil, nil) // nil pipelineCfg → no LLM
|
||||||
|
|
||||||
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
resp := toolCall(t, srv, "brain_ingest", map[string]any{
|
||||||
"content": "some content",
|
"content": "some content",
|
||||||
@@ -173,7 +311,7 @@ func TestBrainIngestRequiresLLMConfigured(t *testing.T) {
|
|||||||
|
|
||||||
func TestSessionLogAppends(t *testing.T) {
|
func TestSessionLogAppends(t *testing.T) {
|
||||||
brainDir := t.TempDir()
|
brainDir := t.TempDir()
|
||||||
srv := mcp.NewServer(brainDir, nil, nil)
|
srv := mcp.NewServer(brainDir, nil, nil, nil)
|
||||||
|
|
||||||
resp := toolCall(t, srv, "session_log", map[string]any{
|
resp := toolCall(t, srv, "session_log", map[string]any{
|
||||||
"session_id": "session-x",
|
"session_id": "session-x",
|
||||||
@@ -190,7 +328,7 @@ func TestSessionLogAppends(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSessionLogRequiresSessionID(t *testing.T) {
|
func TestSessionLogRequiresSessionID(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
resp := toolCall(t, srv, "session_log", map[string]any{"skill": "tdd"})
|
resp := toolCall(t, srv, "session_log", map[string]any{"skill": "tdd"})
|
||||||
require.NotNil(t, resp["error"])
|
require.NotNil(t, resp["error"])
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestMCPMountedHandler(t *testing.T) {
|
func TestMCPMountedHandler(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.Handle("POST /mcp", srv)
|
mux.Handle("POST /mcp", srv)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
// Package mcp implements an MCP HTTP handler for the ingestion service.
|
// Package mcp implements an MCP HTTP handler for the ingestion service.
|
||||||
// Exposed tools: brain_query, brain_write, brain_ingest, brain_ingest_raw, session_log.
|
// Exposed tools: brain_query, brain_write, brain_index, brain_tunnel,
|
||||||
|
// brain_ingest, brain_ingest_raw, brain_answer, brain_classify, session_log.
|
||||||
package mcp
|
package mcp
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -9,6 +10,8 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
)
|
)
|
||||||
|
|
||||||
type request struct {
|
type request struct {
|
||||||
@@ -35,16 +38,39 @@ type Server struct {
|
|||||||
brainDir string
|
brainDir string
|
||||||
pipeline pipeline.Config
|
pipeline pipeline.Config
|
||||||
llm pipeline.CompleteFunc
|
llm pipeline.CompleteFunc
|
||||||
|
answerLLM pipeline.CompleteFunc // nil = brain_answer and brain_classify unavailable
|
||||||
|
reranker *reranker.Client // nil = no rerank, BM25 top-10 → LLM
|
||||||
|
vector search.VectorSearcher // nil = BM25-only retrieval
|
||||||
|
embedder search.Embedder // nil = BM25-only retrieval
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer constructs a Server bound to brainDir. pipelineCfg supplies the
|
// NewServer constructs a Server bound to brainDir. pipelineCfg supplies the
|
||||||
// LLM-backed pipeline; llm may be nil for non-LLM tools only.
|
// LLM-backed pipeline; llm may be nil for non-LLM tools only.
|
||||||
func NewServer(brainDir string, pipelineCfg *pipeline.Config, llm pipeline.CompleteFunc) *Server {
|
// answerLLM drives brain_answer and brain_classify; nil disables those tools.
|
||||||
|
func NewServer(brainDir string, pipelineCfg *pipeline.Config, llm pipeline.CompleteFunc, answerLLM pipeline.CompleteFunc) *Server {
|
||||||
cfg := pipeline.Config{}
|
cfg := pipeline.Config{}
|
||||||
if pipelineCfg != nil {
|
if pipelineCfg != nil {
|
||||||
cfg = *pipelineCfg
|
cfg = *pipelineCfg
|
||||||
}
|
}
|
||||||
return &Server{brainDir: brainDir, pipeline: cfg, llm: llm}
|
return &Server{brainDir: brainDir, pipeline: cfg, llm: llm, answerLLM: answerLLM}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithReranker installs an opt-in cross-encoder reranker. When set,
|
||||||
|
// brain_answer retrieves a wider BM25 candidate set and prunes it to
|
||||||
|
// the relevant ones before LLM synthesis. Returns the server for
|
||||||
|
// fluent chaining.
|
||||||
|
func (s *Server) WithReranker(r *reranker.Client) *Server {
|
||||||
|
s.reranker = r
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithHybridRetrieval wires the embedding store and embedder so
|
||||||
|
// brain_query and brain_answer run BM25 + pgvector merged via RRF
|
||||||
|
// instead of BM25 alone. Either nil disables hybrid mode.
|
||||||
|
func (s *Server) WithHybridRetrieval(v search.VectorSearcher, e search.Embedder) *Server {
|
||||||
|
s.vector = v
|
||||||
|
s.embedder = e
|
||||||
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -134,12 +160,20 @@ func (s *Server) handleCall(ctx context.Context, name string, args json.RawMessa
|
|||||||
return s.brainQuery(ctx, args)
|
return s.brainQuery(ctx, args)
|
||||||
case "brain_write":
|
case "brain_write":
|
||||||
return s.brainWrite(ctx, args)
|
return s.brainWrite(ctx, args)
|
||||||
|
case "brain_index":
|
||||||
|
return s.brainIndex(ctx, args)
|
||||||
|
case "brain_tunnel":
|
||||||
|
return s.brainTunnel(ctx, args)
|
||||||
case "brain_ingest_raw":
|
case "brain_ingest_raw":
|
||||||
return s.brainIngestRaw(ctx, args)
|
return s.brainIngestRaw(ctx, args)
|
||||||
case "brain_ingest":
|
case "brain_ingest":
|
||||||
return s.brainIngest(ctx, args)
|
return s.brainIngest(ctx, args)
|
||||||
case "session_log":
|
case "session_log":
|
||||||
return s.sessionLog(ctx, args)
|
return s.sessionLog(ctx, args)
|
||||||
|
case "brain_answer":
|
||||||
|
return s.brainAnswer(ctx, args)
|
||||||
|
case "brain_classify":
|
||||||
|
return s.brainClassify(ctx, args)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown tool: %s", name)
|
return nil, fmt.Errorf("unknown tool: %s", name)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ func body(t *testing.T, v any) *bytes.Buffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestServerInitialize(t *testing.T) {
|
func TestServerInitialize(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 1, "method": "initialize",
|
"jsonrpc": "2.0", "id": 1, "method": "initialize",
|
||||||
@@ -38,7 +38,7 @@ func TestServerInitialize(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestServerToolsList(t *testing.T) {
|
func TestServerToolsList(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 2, "method": "tools/list",
|
"jsonrpc": "2.0", "id": 2, "method": "tools/list",
|
||||||
@@ -55,12 +55,14 @@ func TestServerToolsList(t *testing.T) {
|
|||||||
names = append(names, t.(map[string]any)["name"].(string))
|
names = append(names, t.(map[string]any)["name"].(string))
|
||||||
}
|
}
|
||||||
assert.ElementsMatch(t, []string{
|
assert.ElementsMatch(t, []string{
|
||||||
"brain_query", "brain_write", "brain_ingest_raw", "brain_ingest", "session_log",
|
"brain_query", "brain_write", "brain_index", "brain_tunnel",
|
||||||
|
"brain_ingest_raw", "brain_ingest",
|
||||||
|
"brain_answer", "brain_classify", "session_log",
|
||||||
}, names)
|
}, names)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestServerNotificationGetsNoBody(t *testing.T) {
|
func TestServerNotificationGetsNoBody(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "method": "notifications/initialized",
|
"jsonrpc": "2.0", "method": "notifications/initialized",
|
||||||
@@ -73,7 +75,7 @@ func TestServerNotificationGetsNoBody(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestServerUnknownMethodReturnsError(t *testing.T) {
|
func TestServerUnknownMethodReturnsError(t *testing.T) {
|
||||||
srv := mcp.NewServer(t.TempDir(), nil, nil)
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", body(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 3, "method": "unknown/method",
|
"jsonrpc": "2.0", "id": 3, "method": "unknown/method",
|
||||||
|
|||||||
157
ingestion/internal/mcp/tools_answer.go
Normal file
157
ingestion/internal/mcp/tools_answer.go
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
package mcp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/search"
|
||||||
|
)
|
||||||
|
|
||||||
|
// rerankResults scores each candidate's excerpt against the query and
|
||||||
|
// returns up to top results whose score is positive, preserving the
|
||||||
|
// caller's input order (BM25 rank) within the kept set. The reranker is
|
||||||
|
// a filter: ties are broken by BM25, not by the reranker's binary score.
|
||||||
|
func rerankResults(ctx context.Context, rr *reranker.Client, query string, results []search.Result, top int) ([]search.Result, error) {
|
||||||
|
docs := make([]string, len(results))
|
||||||
|
for i, r := range results {
|
||||||
|
docs[i] = r.Excerpt
|
||||||
|
}
|
||||||
|
scores, err := rr.Score(ctx, query, docs)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
kept := make([]search.Result, 0, top)
|
||||||
|
for i, r := range results {
|
||||||
|
if scores[i] > 0 {
|
||||||
|
kept = append(kept, r)
|
||||||
|
}
|
||||||
|
if len(kept) == top {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kept, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
answerSystemPrompt = `You are a knowledge assistant. Answer the question using ONLY the provided sources.
|
||||||
|
Cite source file paths inline when referencing specific content.
|
||||||
|
If the context does not contain enough information to answer, say so clearly.`
|
||||||
|
|
||||||
|
classifySystemPrompt = `Classify the document. Respond with JSON only, no markdown fences.
|
||||||
|
{"type":"...","title":"...","tags":["..."]}
|
||||||
|
Valid types: spec, plan, decision, note, wiki, log, code, unknown.`
|
||||||
|
)
|
||||||
|
|
||||||
|
type brainAnswerArgs struct {
|
||||||
|
Query string `json:"query"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) brainAnswer(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if s.answerLLM == nil {
|
||||||
|
return nil, fmt.Errorf("answer LLM not configured: set BRAIN_LLM_PRIMARY_URL")
|
||||||
|
}
|
||||||
|
var a brainAnswerArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.Query == "" {
|
||||||
|
return nil, fmt.Errorf("query is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
// With reranker disabled: BM25 top-10 straight to the LLM.
|
||||||
|
// With reranker enabled: BM25 top-20 → cross-encoder filter → top-5.
|
||||||
|
bm25Limit := 10
|
||||||
|
if s.reranker != nil {
|
||||||
|
bm25Limit = 20
|
||||||
|
}
|
||||||
|
results, err := search.QueryContext(ctx, s.brainDir, search.QueryOptions{
|
||||||
|
Query: a.Query,
|
||||||
|
Limit: bm25Limit,
|
||||||
|
Vector: s.vector,
|
||||||
|
Embedder: s.embedder,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("search: %w", err)
|
||||||
|
}
|
||||||
|
if s.reranker != nil && len(results) > 0 {
|
||||||
|
results, err = rerankResults(ctx, s.reranker, a.Query, results, 5)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("rerank: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(results) == 0 {
|
||||||
|
return json.Marshal(map[string]any{
|
||||||
|
"answer": "No relevant content found in brain.",
|
||||||
|
"sources": []string{},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sources := make([]string, 0, len(results))
|
||||||
|
for _, r := range results {
|
||||||
|
fmt.Fprintf(&sb, "<source path=%q>\n%s\n</source>\n\n", r.Path, r.Excerpt)
|
||||||
|
sources = append(sources, r.Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
answer, err := s.answerLLM(ctx, answerSystemPrompt, sb.String()+"Question: "+a.Query)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("llm: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.Marshal(map[string]any{
|
||||||
|
"answer": answer,
|
||||||
|
"sources": sources,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type brainClassifyArgs struct {
|
||||||
|
Text string `json:"text"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type classifyResult struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) brainClassify(ctx context.Context, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if s.answerLLM == nil {
|
||||||
|
return nil, fmt.Errorf("answer LLM not configured: set BRAIN_LLM_PRIMARY_URL")
|
||||||
|
}
|
||||||
|
var a brainClassifyArgs
|
||||||
|
if err := json.Unmarshal(args, &a); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if a.Text == "" {
|
||||||
|
return nil, fmt.Errorf("text is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
text := a.Text
|
||||||
|
if len(text) > 3000 {
|
||||||
|
text = text[:3000]
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := s.answerLLM(ctx, classifySystemPrompt, text)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("llm: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip markdown fences if model adds them despite the instruction.
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
raw = strings.TrimPrefix(raw, "```json")
|
||||||
|
raw = strings.TrimPrefix(raw, "```")
|
||||||
|
raw = strings.TrimSuffix(raw, "```")
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
|
||||||
|
var cr classifyResult
|
||||||
|
if err := json.Unmarshal([]byte(raw), &cr); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse classify response %q: %w", raw, err)
|
||||||
|
}
|
||||||
|
if cr.Tags == nil {
|
||||||
|
cr.Tags = []string{}
|
||||||
|
}
|
||||||
|
return json.Marshal(cr)
|
||||||
|
}
|
||||||
155
ingestion/internal/mcp/tools_answer_test.go
Normal file
155
ingestion/internal/mcp/tools_answer_test.go
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
package mcp_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/mcp"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/pipeline"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mockAnswerLLM(response string) pipeline.CompleteFunc {
|
||||||
|
return func(_ context.Context, _, _ string) (string, error) {
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func brainDirWithContent(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
wikiDir := filepath.Join(dir, "wiki")
|
||||||
|
require.NoError(t, os.MkdirAll(wikiDir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(wikiDir, "test.md"), []byte(
|
||||||
|
"---\ntitle: Pass-rate Logging\ntype: spec\n---\n\nPass-rate logging tracks skill invocations.",
|
||||||
|
), 0o644))
|
||||||
|
return dir
|
||||||
|
}
|
||||||
|
|
||||||
|
func callTool(t *testing.T, ts *httptest.Server, name string, arguments map[string]any) map[string]any {
|
||||||
|
t.Helper()
|
||||||
|
req := map[string]any{
|
||||||
|
"jsonrpc": "2.0", "id": 1, "method": "tools/call",
|
||||||
|
"params": map[string]any{"name": name, "arguments": arguments},
|
||||||
|
}
|
||||||
|
resp, err := http.Post(ts.URL, "application/json", body(t, req))
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
var out map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(resp.Body).Decode(&out))
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainAnswer_RerankerFiltersBeforeLLM(t *testing.T) {
|
||||||
|
brainDir := t.TempDir()
|
||||||
|
wikiDir := filepath.Join(brainDir, "wiki")
|
||||||
|
require.NoError(t, os.MkdirAll(wikiDir, 0o755))
|
||||||
|
// Two notes — both BM25-match the query, but only one is truly relevant.
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(wikiDir, "good.md"), []byte(
|
||||||
|
"---\ntitle: Pass-rate Logging\n---\nPass-rate logging tracks skill invocations.",
|
||||||
|
), 0o644))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(wikiDir, "noise.md"), []byte(
|
||||||
|
"---\ntitle: Pass-rate Tangent\n---\nPass-rate appears here too but as a tangent.",
|
||||||
|
), 0o644))
|
||||||
|
|
||||||
|
// Fake Ollama reranker: yes only when prompt contains "tracks skill invocations".
|
||||||
|
rrSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
raw, _ := io.ReadAll(r.Body)
|
||||||
|
yes := strings.Contains(string(raw), "tracks skill invocations")
|
||||||
|
ans := "no"
|
||||||
|
if yes {
|
||||||
|
ans = "yes"
|
||||||
|
}
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"response": ans, "done": true})
|
||||||
|
}))
|
||||||
|
defer rrSrv.Close()
|
||||||
|
|
||||||
|
// LLM mock captures the rendered sources so we can assert what reached it.
|
||||||
|
var sawSources string
|
||||||
|
llm := func(_ context.Context, _, user string) (string, error) {
|
||||||
|
sawSources = user
|
||||||
|
return "answer text", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, llm).
|
||||||
|
WithReranker(reranker.New(rrSrv.URL, "qwen3"))
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
rpc := callTool(t, ts, "brain_answer", map[string]any{"query": "pass-rate logging"})
|
||||||
|
require.Nil(t, rpc["error"])
|
||||||
|
|
||||||
|
content := rpc["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string)
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal([]byte(content), &result))
|
||||||
|
sources := result["sources"].([]any)
|
||||||
|
require.Len(t, sources, 1, "reranker should drop noise.md")
|
||||||
|
assert.Equal(t, "wiki/good.md", sources[0])
|
||||||
|
assert.Contains(t, sawSources, "good.md")
|
||||||
|
assert.NotContains(t, sawSources, "noise.md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainAnswer_NoLLM(t *testing.T) {
|
||||||
|
srv := mcp.NewServer(t.TempDir(), nil, nil, nil)
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
rpc := callTool(t, ts, "brain_answer", map[string]any{"query": "test"})
|
||||||
|
assert.NotNil(t, rpc["error"], "expected error when answerLLM is nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainAnswer_Synthesizes(t *testing.T) {
|
||||||
|
brainDir := brainDirWithContent(t)
|
||||||
|
srv := mcp.NewServer(brainDir, nil, nil, mockAnswerLLM("Pass-rate logging is described in spec."))
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
rpc := callTool(t, ts, "brain_answer", map[string]any{"query": "pass-rate logging"})
|
||||||
|
require.Nil(t, rpc["error"])
|
||||||
|
|
||||||
|
content := rpc["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string)
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal([]byte(content), &result))
|
||||||
|
assert.Equal(t, "Pass-rate logging is described in spec.", result["answer"])
|
||||||
|
assert.NotEmpty(t, result["sources"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainClassify_ReturnsJSON(t *testing.T) {
|
||||||
|
llmResp := `{"type":"spec","title":"My Spec","tags":["go","mcp"]}`
|
||||||
|
srv := mcp.NewServer(t.TempDir(), nil, nil, mockAnswerLLM(llmResp))
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
rpc := callTool(t, ts, "brain_classify", map[string]any{"text": "# My Spec\n\nThis is a Go MCP spec."})
|
||||||
|
require.Nil(t, rpc["error"])
|
||||||
|
|
||||||
|
content := rpc["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string)
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal([]byte(content), &result))
|
||||||
|
assert.Equal(t, "spec", result["type"])
|
||||||
|
assert.Equal(t, "My Spec", result["title"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBrainClassify_StripsFences(t *testing.T) {
|
||||||
|
llmResp := "```json\n{\"type\":\"note\",\"title\":\"T\",\"tags\":[]}\n```"
|
||||||
|
srv := mcp.NewServer(t.TempDir(), nil, nil, mockAnswerLLM(llmResp))
|
||||||
|
ts := httptest.NewServer(srv)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
rpc := callTool(t, ts, "brain_classify", map[string]any{"text": "some text"})
|
||||||
|
require.Nil(t, rpc["error"])
|
||||||
|
|
||||||
|
content := rpc["result"].(map[string]any)["content"].([]any)[0].(map[string]any)["text"].(string)
|
||||||
|
var result map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal([]byte(content), &result))
|
||||||
|
assert.Equal(t, "note", result["type"])
|
||||||
|
}
|
||||||
38
ingestion/internal/oauth/metadata.go
Normal file
38
ingestion/internal/oauth/metadata.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
// Package oauth implements a minimal OAuth 2.0 client_credentials flow
|
||||||
|
// for the brain MCP server. Designed for claude.ai's custom MCP integration
|
||||||
|
// UI, which only supports OAuth (no static-Bearer field). The flow trades
|
||||||
|
// a registered client_id + client_secret for the existing BRAIN_MCP_TOKEN —
|
||||||
|
// no JWTs, no expiry, no refresh — so the rest of the auth middleware is
|
||||||
|
// unchanged.
|
||||||
|
package oauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MetadataHandler serves RFC 8414 authorization-server metadata at
|
||||||
|
// GET /.well-known/oauth-authorization-server. issuer must be the public
|
||||||
|
// origin of the brain MCP (e.g. https://brain-mcp.d-ma.be); the handler
|
||||||
|
// derives the token endpoint from it.
|
||||||
|
//
|
||||||
|
// Mount with no auth — discovery must be reachable to anonymous callers.
|
||||||
|
func MetadataHandler(issuer string) http.HandlerFunc {
|
||||||
|
issuer = strings.TrimRight(issuer, "/")
|
||||||
|
body, _ := json.Marshal(struct {
|
||||||
|
Issuer string `json:"issuer"`
|
||||||
|
TokenEndpoint string `json:"token_endpoint"`
|
||||||
|
GrantTypes []string `json:"grant_types_supported"`
|
||||||
|
TokenEndpointAuthMeth []string `json:"token_endpoint_auth_methods_supported"`
|
||||||
|
}{
|
||||||
|
Issuer: issuer,
|
||||||
|
TokenEndpoint: issuer + "/oauth/token",
|
||||||
|
GrantTypes: []string{"client_credentials"},
|
||||||
|
TokenEndpointAuthMeth: []string{"client_secret_post", "client_secret_basic"},
|
||||||
|
})
|
||||||
|
return func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
41
ingestion/internal/oauth/metadata_test.go
Normal file
41
ingestion/internal/oauth/metadata_test.go
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
package oauth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMetadataHandler_ReturnsJSON(t *testing.T) {
|
||||||
|
h := oauth.MetadataHandler("https://brain-mcp.d-ma.be")
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/.well-known/oauth-authorization-server", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
h.ServeHTTP(rr, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
assert.Equal(t, "application/json", rr.Header().Get("Content-Type"))
|
||||||
|
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &body))
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be", body["issuer"])
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be/oauth/token", body["token_endpoint"])
|
||||||
|
assert.ElementsMatch(t, []any{"client_credentials"}, body["grant_types_supported"])
|
||||||
|
assert.ElementsMatch(t,
|
||||||
|
[]any{"client_secret_post", "client_secret_basic"},
|
||||||
|
body["token_endpoint_auth_methods_supported"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetadataHandler_StripsTrailingSlashFromIssuer(t *testing.T) {
|
||||||
|
h := oauth.MetadataHandler("https://brain-mcp.d-ma.be/")
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
h.ServeHTTP(rr, httptest.NewRequest(http.MethodGet, "/.well-known/oauth-authorization-server", nil))
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &body))
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be", body["issuer"])
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be/oauth/token", body["token_endpoint"])
|
||||||
|
}
|
||||||
87
ingestion/internal/oauth/token.go
Normal file
87
ingestion/internal/oauth/token.go
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package oauth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/subtle"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenConfig is the static configuration for the token endpoint. All
|
||||||
|
// three fields are required.
|
||||||
|
type TokenConfig struct {
|
||||||
|
// ClientID and ClientSecret are the single accepted credentials.
|
||||||
|
// claude.ai's custom-MCP UI persists these on its side.
|
||||||
|
ClientID string
|
||||||
|
ClientSecret string
|
||||||
|
// AccessToken is the bearer value handed back on a successful
|
||||||
|
// exchange. In this deployment it is BRAIN_MCP_TOKEN — the same
|
||||||
|
// static token the rest of the auth middleware already accepts —
|
||||||
|
// so no JWT machinery is needed downstream.
|
||||||
|
AccessToken string
|
||||||
|
}
|
||||||
|
|
||||||
|
// TokenHandler serves POST /oauth/token. Implements the
|
||||||
|
// client_credentials grant only, with client_secret_post and
|
||||||
|
// client_secret_basic auth methods (both advertised by MetadataHandler).
|
||||||
|
// Errors follow RFC 6749 §5.2 — JSON body with an "error" field.
|
||||||
|
//
|
||||||
|
// Mount with no auth — credentials live in the request body / header.
|
||||||
|
func TokenHandler(cfg TokenConfig) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
w.Header().Set("Allow", http.MethodPost)
|
||||||
|
writeOAuthError(w, http.StatusMethodNotAllowed, "invalid_request", "POST required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := r.ParseForm(); err != nil {
|
||||||
|
writeOAuthError(w, http.StatusBadRequest, "invalid_request", "form parse")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.PostForm.Get("grant_type") != "client_credentials" {
|
||||||
|
writeOAuthError(w, http.StatusBadRequest, "unsupported_grant_type",
|
||||||
|
"only client_credentials is supported")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
clientID, clientSecret := extractClientCreds(r)
|
||||||
|
if !constantTimeEqual(clientID, cfg.ClientID) ||
|
||||||
|
!constantTimeEqual(clientSecret, cfg.ClientSecret) {
|
||||||
|
writeOAuthError(w, http.StatusUnauthorized, "invalid_client", "bad credentials")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
_ = json.NewEncoder(w).Encode(struct {
|
||||||
|
AccessToken string `json:"access_token"`
|
||||||
|
TokenType string `json:"token_type"`
|
||||||
|
}{cfg.AccessToken, "bearer"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractClientCreds returns the client_id and client_secret pair from
|
||||||
|
// either client_secret_basic (HTTP Basic) or client_secret_post (form
|
||||||
|
// fields). When both are present, Basic wins per RFC 6749 §2.3.1.
|
||||||
|
func extractClientCreds(r *http.Request) (string, string) {
|
||||||
|
if id, secret, ok := r.BasicAuth(); ok {
|
||||||
|
return id, secret
|
||||||
|
}
|
||||||
|
return r.PostForm.Get("client_id"), r.PostForm.Get("client_secret")
|
||||||
|
}
|
||||||
|
|
||||||
|
func constantTimeEqual(a, b string) bool {
|
||||||
|
if a == "" || b == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return subtle.ConstantTimeCompare([]byte(a), []byte(b)) == 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeOAuthError(w http.ResponseWriter, status int, code, desc string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
ErrorDescription string `json:"error_description,omitempty"`
|
||||||
|
}{code, desc})
|
||||||
|
}
|
||||||
134
ingestion/internal/oauth/token_test.go
Normal file
134
ingestion/internal/oauth/token_test.go
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
package oauth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/oauth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTokenServer() *httptest.Server {
|
||||||
|
return httptest.NewServer(oauth.TokenHandler(oauth.TokenConfig{
|
||||||
|
ClientID: "the-client",
|
||||||
|
ClientSecret: "the-secret",
|
||||||
|
AccessToken: "BRAIN_TOKEN_VALUE",
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func postForm(t *testing.T, srv *httptest.Server, vals url.Values, basic [2]string) *http.Response {
|
||||||
|
t.Helper()
|
||||||
|
req, err := http.NewRequest(http.MethodPost, srv.URL+"/oauth/token", strings.NewReader(vals.Encode()))
|
||||||
|
require.NoError(t, err)
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
if basic[0] != "" {
|
||||||
|
req.SetBasicAuth(basic[0], basic[1])
|
||||||
|
}
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return resp
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_ClientSecretPost_Success(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
resp := postForm(t, srv, url.Values{
|
||||||
|
"grant_type": {"client_credentials"},
|
||||||
|
"client_id": {"the-client"},
|
||||||
|
"client_secret": {"the-secret"},
|
||||||
|
}, [2]string{})
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusOK, resp.StatusCode)
|
||||||
|
assert.Equal(t, "application/json", resp.Header.Get("Content-Type"))
|
||||||
|
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(resp.Body).Decode(&body))
|
||||||
|
assert.Equal(t, "BRAIN_TOKEN_VALUE", body["access_token"])
|
||||||
|
assert.Equal(t, "bearer", body["token_type"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_ClientSecretBasic_Success(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
resp := postForm(t, srv,
|
||||||
|
url.Values{"grant_type": {"client_credentials"}},
|
||||||
|
[2]string{"the-client", "the-secret"},
|
||||||
|
)
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusOK, resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_WrongSecret(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
resp := postForm(t, srv, url.Values{
|
||||||
|
"grant_type": {"client_credentials"},
|
||||||
|
"client_id": {"the-client"},
|
||||||
|
"client_secret": {"wrong"},
|
||||||
|
}, [2]string{})
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, resp.StatusCode)
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(resp.Body).Decode(&body))
|
||||||
|
assert.Equal(t, "invalid_client", body["error"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_BadGrantType(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
resp := postForm(t, srv, url.Values{
|
||||||
|
"grant_type": {"password"},
|
||||||
|
"client_id": {"the-client"},
|
||||||
|
"client_secret": {"the-secret"},
|
||||||
|
}, [2]string{})
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.NewDecoder(resp.Body).Decode(&body))
|
||||||
|
assert.Equal(t, "unsupported_grant_type", body["error"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_RejectsGet(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
resp, err := http.Get(srv.URL + "/oauth/token")
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusMethodNotAllowed, resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_BasicMalformed_FallsThrough(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
// Malformed (non-base64) Authorization header — handler should treat
|
||||||
|
// the request as missing creds, not crash.
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, srv.URL+"/oauth/token",
|
||||||
|
strings.NewReader("grant_type=client_credentials"))
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req.Header.Set("Authorization", "Basic ###not-base64###")
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenHandler_BasicNoColon(t *testing.T) {
|
||||||
|
srv := newTokenServer()
|
||||||
|
defer srv.Close()
|
||||||
|
// "client-only" base64 — missing the `:secret` half.
|
||||||
|
enc := base64.StdEncoding.EncodeToString([]byte("the-client"))
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, srv.URL+"/oauth/token",
|
||||||
|
strings.NewReader("grant_type=client_credentials"))
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req.Header.Set("Authorization", "Basic "+enc)
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
assert.Equal(t, http.StatusUnauthorized, resp.StatusCode)
|
||||||
|
}
|
||||||
119
ingestion/internal/reranker/reranker.go
Normal file
119
ingestion/internal/reranker/reranker.go
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
// Package reranker scores (query, document) pairs against a cross-encoder
|
||||||
|
// served by an Ollama-compatible backend.
|
||||||
|
//
|
||||||
|
// Wire format is Ollama's `/api/generate`. The model is prompted with the
|
||||||
|
// Qwen3-Reranker yes/no template — the canonical interface the model
|
||||||
|
// itself was trained against — and the first token of the response is
|
||||||
|
// treated as a binary relevance vote: "yes" → 1.0, anything else → 0.0.
|
||||||
|
// Ties are expected to be broken by the caller's primary retrieval score
|
||||||
|
// (e.g. BM25), so the binary signal is a filter rather than a ranking
|
||||||
|
// substitute.
|
||||||
|
package reranker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client posts rerank requests to an Ollama-compatible endpoint.
|
||||||
|
type Client struct {
|
||||||
|
URL string
|
||||||
|
Model string
|
||||||
|
HTTP *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// New constructs a Client. Returns nil when url is empty so callers can
|
||||||
|
// treat a missing BRAIN_RERANKER_URL as "feature disabled" with a single
|
||||||
|
// nil check.
|
||||||
|
func New(url, model string) *Client {
|
||||||
|
if url == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &Client{
|
||||||
|
URL: strings.TrimRight(url, "/"),
|
||||||
|
Model: model,
|
||||||
|
HTTP: &http.Client{Timeout: 30 * time.Second},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Score returns one [0, 1] relevance score per input document, parallel
|
||||||
|
// to the input order. Each (query, doc) pair is scored independently —
|
||||||
|
// Qwen3-Reranker is a cross-encoder and expects per-pair calls.
|
||||||
|
func (c *Client) Score(ctx context.Context, query string, docs []string) ([]float64, error) {
|
||||||
|
out := make([]float64, len(docs))
|
||||||
|
for i, doc := range docs {
|
||||||
|
s, err := c.scoreOne(ctx, query, doc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("rerank doc %d: %w", i, err)
|
||||||
|
}
|
||||||
|
out[i] = s
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) scoreOne(ctx context.Context, query, doc string) (float64, error) {
|
||||||
|
prompt := buildPrompt(query, doc)
|
||||||
|
reqBody, _ := json.Marshal(map[string]any{
|
||||||
|
"model": c.Model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": false,
|
||||||
|
"options": map[string]any{
|
||||||
|
"num_predict": 4,
|
||||||
|
"temperature": 0,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||||
|
c.URL+"/api/generate", bytes.NewReader(reqBody))
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
resp, err := c.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
if resp.StatusCode/100 != 2 {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return 0, fmt.Errorf("status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
var out struct {
|
||||||
|
Response string `json:"response"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return parseYesNo(out.Response), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildPrompt assembles the Qwen3-Reranker chat template. Kept verbatim
|
||||||
|
// because the model was trained on this exact wording.
|
||||||
|
func buildPrompt(query, doc string) string {
|
||||||
|
return "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n" +
|
||||||
|
"<|im_start|>user\n<Instruct>: Given a web search query, retrieve relevant passages that answer the query\n" +
|
||||||
|
"<Query>: " + query + "\n" +
|
||||||
|
"<Document>: " + doc + "<|im_end|>\n" +
|
||||||
|
"<|im_start|>assistant\n<think>\n\n</think>\n\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseYesNo extracts the first meaningful token from response and
|
||||||
|
// returns 1.0 when it starts with "yes" (case-insensitive), 0.0 otherwise.
|
||||||
|
// Any leading whitespace, `<think>` block, or punctuation is skipped.
|
||||||
|
func parseYesNo(s string) float64 {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
// Strip any `<think>…</think>` block the model may emit even with empty thinking.
|
||||||
|
if idx := strings.Index(s, "</think>"); idx != -1 {
|
||||||
|
s = strings.TrimSpace(s[idx+len("</think>"):])
|
||||||
|
}
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
if strings.HasPrefix(s, "yes") {
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
119
ingestion/internal/reranker/reranker_test.go
Normal file
119
ingestion/internal/reranker/reranker_test.go
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
package reranker_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/reranker"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeOllama responds to /api/generate based on a per-document
|
||||||
|
// {needle → answer} map: if the prompt contains the needle, returns
|
||||||
|
// the mapped answer.
|
||||||
|
type fakeOllama struct {
|
||||||
|
t *testing.T
|
||||||
|
answers map[string]string // needle → "yes" or "no"
|
||||||
|
calls int
|
||||||
|
lastBody map[string]any
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeOllama) handler() http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
require.Equal(f.t, http.MethodPost, r.Method)
|
||||||
|
require.Equal(f.t, "/api/generate", r.URL.Path)
|
||||||
|
body, err := io.ReadAll(r.Body)
|
||||||
|
require.NoError(f.t, err)
|
||||||
|
var p map[string]any
|
||||||
|
require.NoError(f.t, json.Unmarshal(body, &p))
|
||||||
|
f.calls++
|
||||||
|
f.lastBody = p
|
||||||
|
prompt := p["prompt"].(string)
|
||||||
|
answer := "no"
|
||||||
|
for needle, a := range f.answers {
|
||||||
|
if strings.Contains(prompt, needle) {
|
||||||
|
answer = a
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"model": p["model"], "response": answer, "done": true,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNew_EmptyURLReturnsNil(t *testing.T) {
|
||||||
|
assert.Nil(t, reranker.New("", "model"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore_YesAndNoOrdered(t *testing.T) {
|
||||||
|
f := &fakeOllama{t: t, answers: map[string]string{
|
||||||
|
"alpha doc": "yes",
|
||||||
|
"beta doc": "no",
|
||||||
|
"gamma doc": "yes",
|
||||||
|
}}
|
||||||
|
srv := httptest.NewServer(f.handler())
|
||||||
|
defer srv.Close()
|
||||||
|
c := reranker.New(srv.URL, "test-model")
|
||||||
|
require.NotNil(t, c)
|
||||||
|
|
||||||
|
scores, err := c.Score(context.Background(), "what is alpha",
|
||||||
|
[]string{"alpha doc body", "beta doc body", "gamma doc body"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, scores, 3)
|
||||||
|
assert.Equal(t, 1.0, scores[0])
|
||||||
|
assert.Equal(t, 0.0, scores[1])
|
||||||
|
assert.Equal(t, 1.0, scores[2])
|
||||||
|
assert.Equal(t, 3, f.calls)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore_SendsCorrectShape(t *testing.T) {
|
||||||
|
f := &fakeOllama{t: t, answers: map[string]string{"hello": "yes"}}
|
||||||
|
srv := httptest.NewServer(f.handler())
|
||||||
|
defer srv.Close()
|
||||||
|
c := reranker.New(srv.URL, "qwen3-rerank")
|
||||||
|
_, err := c.Score(context.Background(), "greeting", []string{"hello world"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "qwen3-rerank", f.lastBody["model"])
|
||||||
|
prompt := f.lastBody["prompt"].(string)
|
||||||
|
assert.Contains(t, prompt, "greeting")
|
||||||
|
assert.Contains(t, prompt, "hello world")
|
||||||
|
assert.Contains(t, prompt, `"yes" or "no"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore_HandlesAmbiguousResponse(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{"response": "maybe — unclear", "done": true})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
c := reranker.New(srv.URL, "m")
|
||||||
|
scores, err := c.Score(context.Background(), "q", []string{"d"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
// Anything that does not start with "yes" (case-insensitive, after
|
||||||
|
// whitespace/think trim) is treated as "no" = 0.
|
||||||
|
assert.Equal(t, []float64{0}, scores)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore_EmptyDocsReturnsEmpty(t *testing.T) {
|
||||||
|
c := reranker.New("http://127.0.0.1:1", "m")
|
||||||
|
scores, err := c.Score(context.Background(), "q", nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Empty(t, scores)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore_UpstreamErrorPropagates(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
c := reranker.New(srv.URL, "m")
|
||||||
|
_, err := c.Score(context.Background(), "q", []string{"d"})
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
@@ -3,38 +3,93 @@ package search
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/brain"
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// VectorSearcher returns the top-limit nearest paths by cosine
|
||||||
|
// distance. The vectorstore package implements this against pgvector.
|
||||||
|
type VectorSearcher interface {
|
||||||
|
Search(ctx context.Context, query []float32, limit int) ([]VectorHit, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// VectorHit is a single path + distance pair from a vector search.
|
||||||
|
// Re-declared here (rather than imported) to keep search package
|
||||||
|
// free of vectorstore/embed deps and to make stubbing trivial in tests.
|
||||||
|
type VectorHit struct {
|
||||||
|
Path string
|
||||||
|
Distance float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embedder turns a query string into a dense vector. The embed package
|
||||||
|
// implements this against Ollama's /api/embed.
|
||||||
|
type Embedder interface {
|
||||||
|
Embed(ctx context.Context, text string) ([]float32, error)
|
||||||
|
}
|
||||||
|
|
||||||
// Result is a single search hit from the brain wiki.
|
// Result is a single search hit from the brain wiki.
|
||||||
type Result struct {
|
type Result struct {
|
||||||
Path string `json:"path"`
|
Path string `json:"path"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
Excerpt string `json:"excerpt"`
|
Excerpt string `json:"excerpt"`
|
||||||
Score int `json:"score"`
|
Score int `json:"score"`
|
||||||
|
Wing string `json:"wing,omitempty"`
|
||||||
|
Hall string `json:"hall,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query searches all .md files under brainDir/wiki/ for pages containing
|
// QueryOptions configures a search.
|
||||||
// any of the whitespace-separated terms in query. Returns up to limit results
|
//
|
||||||
// sorted by score descending.
|
// When Wing is set, the walk is restricted to brain/wiki/<wing>/.
|
||||||
func Query(brainDir, query string, limit int) ([]Result, error) {
|
// When Hall is additionally set, the walk is restricted to
|
||||||
if limit <= 0 {
|
// brain/wiki/<wing>/<hall>/. Without either, the legacy walk over
|
||||||
limit = 5
|
// brain/knowledge/ and brain/wiki/ is used.
|
||||||
|
//
|
||||||
|
// When both Vector and Embedder are non-nil, results are computed
|
||||||
|
// hybridly: BM25 and vector candidate lists are merged via Reciprocal
|
||||||
|
// Rank Fusion. With either nil the function falls back to BM25 only,
|
||||||
|
// keeping behaviour unchanged for callers that have not opted in.
|
||||||
|
type QueryOptions struct {
|
||||||
|
Query string
|
||||||
|
Limit int
|
||||||
|
Wing string
|
||||||
|
Hall string
|
||||||
|
Vector VectorSearcher
|
||||||
|
Embedder Embedder
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query searches the brain. Returns up to opts.Limit results sorted by
|
||||||
|
// score descending. Empty query returns nil.
|
||||||
|
func Query(brainDir string, opts QueryOptions) ([]Result, error) {
|
||||||
|
return QueryContext(context.Background(), brainDir, opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// QueryContext is the cancellable variant of Query. Hybrid retrieval
|
||||||
|
// requires a context because both the embedder and the vector store are
|
||||||
|
// network calls.
|
||||||
|
func QueryContext(ctx context.Context, brainDir string, opts QueryOptions) ([]Result, error) {
|
||||||
|
if opts.Limit <= 0 {
|
||||||
|
opts.Limit = 5
|
||||||
}
|
}
|
||||||
terms := strings.Fields(strings.ToLower(query))
|
terms := strings.Fields(strings.ToLower(opts.Query))
|
||||||
if len(terms) == 0 {
|
if len(terms) == 0 {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var results []Result
|
roots, err := resolveRoots(brainDir, opts.Wing, opts.Hall)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
for _, subdir := range []string{"knowledge", "wiki"} {
|
var results []Result
|
||||||
dir := filepath.Join(brainDir, subdir)
|
for _, dir := range roots {
|
||||||
if _, statErr := os.Stat(dir); os.IsNotExist(statErr) {
|
if _, statErr := os.Stat(dir); os.IsNotExist(statErr) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -46,13 +101,11 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
|||||||
if d.IsDir() || !strings.HasSuffix(path, ".md") {
|
if d.IsDir() || !strings.HasSuffix(path, ".md") {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
content, err := os.ReadFile(path)
|
content, err := os.ReadFile(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("search: skipping unreadable file", "path", path, "err", err)
|
slog.Warn("search: skipping unreadable file", "path", path, "err", err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
lower := strings.ToLower(string(content))
|
lower := strings.ToLower(string(content))
|
||||||
score := 0
|
score := 0
|
||||||
for _, term := range terms {
|
for _, term := range terms {
|
||||||
@@ -61,18 +114,19 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
|||||||
if score == 0 {
|
if score == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
rel, err := filepath.Rel(brainDir, path)
|
rel, err := filepath.Rel(brainDir, path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("rel path: %w", err)
|
return fmt.Errorf("rel path: %w", err)
|
||||||
}
|
}
|
||||||
rel = filepath.ToSlash(rel)
|
rel = filepath.ToSlash(rel)
|
||||||
|
wing, hall := extractWingHall(string(content), rel)
|
||||||
results = append(results, Result{
|
results = append(results, Result{
|
||||||
Path: rel,
|
Path: rel,
|
||||||
Title: extractTitle(string(content), d.Name()),
|
Title: extractTitle(string(content), d.Name()),
|
||||||
Excerpt: excerpt(string(content), 300),
|
Excerpt: excerpt(string(content), 300),
|
||||||
Score: score,
|
Score: score,
|
||||||
|
Wing: wing,
|
||||||
|
Hall: hall,
|
||||||
})
|
})
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
@@ -84,12 +138,181 @@ func Query(brainDir, query string, limit int) ([]Result, error) {
|
|||||||
sort.Slice(results, func(i, j int) bool {
|
sort.Slice(results, func(i, j int) bool {
|
||||||
return results[i].Score > results[j].Score
|
return results[i].Score > results[j].Score
|
||||||
})
|
})
|
||||||
if len(results) > limit {
|
|
||||||
results = results[:limit]
|
// Hybrid scoring kicks in only when both the embedder and the
|
||||||
|
// vector store are wired and BM25 actually returned candidates.
|
||||||
|
if opts.Vector != nil && opts.Embedder != nil && len(results) > 0 {
|
||||||
|
merged, err := hybridMerge(ctx, brainDir, opts, results)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("search: hybrid merge failed, falling back to BM25", "err", err)
|
||||||
|
} else {
|
||||||
|
results = merged
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(results) > opts.Limit {
|
||||||
|
results = results[:opts.Limit]
|
||||||
}
|
}
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rrfK is the constant in the Reciprocal Rank Fusion formula. 60 is
|
||||||
|
// standard (Cormack et al. 2009) and parameter-free in practice.
|
||||||
|
const rrfK = 60.0
|
||||||
|
|
||||||
|
// hybridMerge embeds the query, runs a vector search, and merges its
|
||||||
|
// candidates with the BM25 list via Reciprocal Rank Fusion. Results
|
||||||
|
// that came only from the vector side are hydrated by reading the
|
||||||
|
// note's frontmatter for title/wing/hall and excerpting the body.
|
||||||
|
//
|
||||||
|
// rrf(d) = sum_r 1 / (k + rank_r(d)) over rankers r ∈ {BM25, vector}.
|
||||||
|
func hybridMerge(ctx context.Context, brainDir string, opts QueryOptions, bm25 []Result) ([]Result, error) {
|
||||||
|
q, err := opts.Embedder.Embed(ctx, opts.Query)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("embed query: %w", err)
|
||||||
|
}
|
||||||
|
vectorLimit := opts.Limit * 4
|
||||||
|
if vectorLimit < 20 {
|
||||||
|
vectorLimit = 20
|
||||||
|
}
|
||||||
|
hits, err := opts.Vector.Search(ctx, q, vectorLimit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("vector search: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rrf := make(map[string]float64)
|
||||||
|
byPath := make(map[string]Result)
|
||||||
|
for rank, r := range bm25 {
|
||||||
|
rrf[r.Path] += 1.0 / (rrfK + float64(rank+1))
|
||||||
|
byPath[r.Path] = r
|
||||||
|
}
|
||||||
|
for rank, h := range hits {
|
||||||
|
// Vector store keys are chunk paths ("wiki/foo.md#0001"); collapse
|
||||||
|
// back to the parent so multiple chunk hits from the same file
|
||||||
|
// score against a single result row.
|
||||||
|
parent := vectorstore.ParentPath(h.Path)
|
||||||
|
if opts.Wing != "" && !pathInScope(parent, opts.Wing, opts.Hall) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rrf[parent] += 1.0 / (rrfK + float64(rank+1))
|
||||||
|
if _, seen := byPath[parent]; !seen {
|
||||||
|
r, err := hydrate(brainDir, parent)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("search: hydrate failed for vector hit", "path", parent, "err", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
byPath[parent] = r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := make([]Result, 0, len(byPath))
|
||||||
|
for p, r := range byPath {
|
||||||
|
r.Score = int(rrf[p] * 1e6) // scale to int for stable JSON; relative order is what matters
|
||||||
|
merged = append(merged, r)
|
||||||
|
}
|
||||||
|
sort.Slice(merged, func(i, j int) bool {
|
||||||
|
return merged[i].Score > merged[j].Score
|
||||||
|
})
|
||||||
|
return merged, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// pathInScope reports whether a wiki path satisfies the wing/hall filter.
|
||||||
|
func pathInScope(relPath, wing, hall string) bool {
|
||||||
|
prefix := "wiki/" + brain.Sanitise(wing) + "/"
|
||||||
|
if hall != "" {
|
||||||
|
prefix += hall + "/"
|
||||||
|
}
|
||||||
|
return strings.HasPrefix(relPath, prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
// hydrate reads a single note from disk and returns a Result with title,
|
||||||
|
// excerpt, wing, and hall populated. Used for paths that surface only
|
||||||
|
// via vector search.
|
||||||
|
func hydrate(brainDir, relPath string) (Result, error) {
|
||||||
|
full := filepath.Join(brainDir, filepath.FromSlash(relPath))
|
||||||
|
content, err := os.ReadFile(full)
|
||||||
|
if err != nil {
|
||||||
|
return Result{}, err
|
||||||
|
}
|
||||||
|
wing, hall := extractWingHall(string(content), relPath)
|
||||||
|
return Result{
|
||||||
|
Path: relPath,
|
||||||
|
Title: extractTitle(string(content), filepath.Base(relPath)),
|
||||||
|
Excerpt: excerpt(string(content), 300),
|
||||||
|
Wing: wing,
|
||||||
|
Hall: hall,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveRoots returns the directories to walk for the given wing/hall
|
||||||
|
// filters. Validates hall against the closed vocabulary when set.
|
||||||
|
func resolveRoots(brainDir, wing, hall string) ([]string, error) {
|
||||||
|
if hall != "" && !brain.IsValidHall(hall) {
|
||||||
|
return nil, fmt.Errorf("invalid hall %q", hall)
|
||||||
|
}
|
||||||
|
if wing != "" {
|
||||||
|
w := brain.Sanitise(wing)
|
||||||
|
if w == "" {
|
||||||
|
return nil, fmt.Errorf("invalid wing %q", wing)
|
||||||
|
}
|
||||||
|
if hall != "" {
|
||||||
|
return []string{filepath.Join(brainDir, "wiki", w, hall)}, nil
|
||||||
|
}
|
||||||
|
return []string{filepath.Join(brainDir, "wiki", w)}, nil
|
||||||
|
}
|
||||||
|
if hall != "" {
|
||||||
|
return nil, fmt.Errorf("hall filter requires wing")
|
||||||
|
}
|
||||||
|
return []string{
|
||||||
|
filepath.Join(brainDir, "knowledge"),
|
||||||
|
filepath.Join(brainDir, "wiki"),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractWingHall reads wing/hall from frontmatter first, falling back to
|
||||||
|
// path segments brain/wiki/<wing>/<hall>/.
|
||||||
|
func extractWingHall(content, relPath string) (wing, hall string) {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||||
|
inFrontmatter := false
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if strings.TrimSpace(line) == "---" {
|
||||||
|
if !inFrontmatter {
|
||||||
|
inFrontmatter = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !inFrontmatter {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key, val, ok := strings.Cut(line, ":")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v := strings.Trim(strings.TrimSpace(val), `"'`)
|
||||||
|
switch strings.TrimSpace(key) {
|
||||||
|
case "wing":
|
||||||
|
wing = v
|
||||||
|
case "hall":
|
||||||
|
hall = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if wing != "" && hall != "" {
|
||||||
|
return wing, hall
|
||||||
|
}
|
||||||
|
parts := strings.Split(relPath, "/")
|
||||||
|
if len(parts) >= 4 && parts[0] == "wiki" {
|
||||||
|
if wing == "" {
|
||||||
|
wing = parts[1]
|
||||||
|
}
|
||||||
|
if hall == "" && brain.IsValidHall(parts[2]) {
|
||||||
|
hall = parts[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return wing, hall
|
||||||
|
}
|
||||||
|
|
||||||
func extractTitle(content, filename string) string {
|
func extractTitle(content, filename string) string {
|
||||||
scanner := bufio.NewScanner(strings.NewReader(content))
|
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||||
inFrontmatter := false
|
inFrontmatter := false
|
||||||
@@ -113,7 +336,6 @@ func extractTitle(content, filename string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func excerpt(content string, maxLen int) string {
|
func excerpt(content string, maxLen int) string {
|
||||||
// Skip frontmatter, return first maxLen chars of body.
|
|
||||||
parts := strings.SplitN(content, "---", 3)
|
parts := strings.SplitN(content, "---", 3)
|
||||||
body := content
|
body := content
|
||||||
if len(parts) == 3 {
|
if len(parts) == 3 {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
package search_test
|
package search_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -12,6 +13,99 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type stubEmbedder struct{ vec []float32 }
|
||||||
|
|
||||||
|
func (s stubEmbedder) Embed(_ context.Context, _ string) ([]float32, error) { return s.vec, nil }
|
||||||
|
|
||||||
|
type stubVector struct{ hits []search.VectorHit }
|
||||||
|
|
||||||
|
func (s stubVector) Search(_ context.Context, _ []float32, _ int) ([]search.VectorHit, error) {
|
||||||
|
return s.hits, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch_HybridRRFPromotesVectorOnlyHit(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
// BM25-keyword note (matches "lejpa" once)
|
||||||
|
{"wiki/jepa-fx/facts/foo.md", "---\ntitle: Foo\n---\nlejpa keyword\n"},
|
||||||
|
// Semantically related note that does NOT contain the keyword.
|
||||||
|
{"wiki/jepa-fx/facts/semantic.md", "---\ntitle: Semantic\n---\nNo keyword in body.\n"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(dir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
embedder := stubEmbedder{vec: []float32{0.1}}
|
||||||
|
vector := stubVector{hits: []search.VectorHit{
|
||||||
|
{Path: "wiki/jepa-fx/facts/semantic.md", Distance: 0.05}, // best vector match
|
||||||
|
{Path: "wiki/jepa-fx/facts/foo.md", Distance: 0.10},
|
||||||
|
}}
|
||||||
|
|
||||||
|
got, err := search.Query(dir, search.QueryOptions{
|
||||||
|
Query: "lejpa",
|
||||||
|
Limit: 5,
|
||||||
|
Vector: vector,
|
||||||
|
Embedder: embedder,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, got, 2, "vector-only hit should be hydrated into results")
|
||||||
|
paths := []string{got[0].Path, got[1].Path}
|
||||||
|
assert.Contains(t, paths, "wiki/jepa-fx/facts/foo.md")
|
||||||
|
assert.Contains(t, paths, "wiki/jepa-fx/facts/semantic.md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch_HybridDedupesChunkPathsToParent(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
full := filepath.Join(dir, "knowledge", "long.md")
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
// Body contains the BM25 keyword "alpaca" so hybridMerge actually runs
|
||||||
|
// (it only kicks in when BM25 returns at least one candidate).
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte("---\ntitle: Long\n---\nalpaca content.\n"), 0o644))
|
||||||
|
|
||||||
|
embedder := stubEmbedder{vec: []float32{0.1}}
|
||||||
|
// Vector store returns three chunk-path hits all pointing at the same
|
||||||
|
// parent file. The merged result must surface ONE row per parent — not
|
||||||
|
// three rows with chunk-suffixed paths.
|
||||||
|
vector := stubVector{hits: []search.VectorHit{
|
||||||
|
{Path: "knowledge/long.md#0001", Distance: 0.05},
|
||||||
|
{Path: "knowledge/long.md#0002", Distance: 0.07},
|
||||||
|
{Path: "knowledge/long.md#0003", Distance: 0.09},
|
||||||
|
}}
|
||||||
|
|
||||||
|
got, err := search.Query(dir, search.QueryOptions{
|
||||||
|
Query: "alpaca",
|
||||||
|
Limit: 5,
|
||||||
|
Vector: vector,
|
||||||
|
Embedder: embedder,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, got, 1, "three chunk hits for one parent must merge to one result")
|
||||||
|
assert.Equal(t, "knowledge/long.md", got[0].Path)
|
||||||
|
assert.Equal(t, "Long", got[0].Title)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch_HybridFallsBackOnEmbedderError(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki"), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(dir, "wiki", "x.md"), []byte("keyword foo"), 0o644))
|
||||||
|
|
||||||
|
embedder := errorEmbedder{}
|
||||||
|
vector := stubVector{}
|
||||||
|
got, err := search.Query(dir, search.QueryOptions{
|
||||||
|
Query: "keyword", Limit: 5, Vector: vector, Embedder: embedder,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, got, 1, "BM25 result should still come back when embedder fails")
|
||||||
|
assert.Equal(t, "wiki/x.md", got[0].Path)
|
||||||
|
}
|
||||||
|
|
||||||
|
type errorEmbedder struct{}
|
||||||
|
|
||||||
|
func (errorEmbedder) Embed(_ context.Context, _ string) ([]float32, error) {
|
||||||
|
return nil, assert.AnError
|
||||||
|
}
|
||||||
|
|
||||||
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||||
@@ -27,7 +121,7 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
|||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
|
|
||||||
results, err := search.Query(dir, "retry transient", 5)
|
results, err := search.Query(dir, search.QueryOptions{Query: "retry transient", Limit: 5})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, results, 1)
|
require.Len(t, results, 1)
|
||||||
assert.Equal(t, "knowledge/retry-logic.md", results[0].Path)
|
assert.Equal(t, "knowledge/retry-logic.md", results[0].Path)
|
||||||
@@ -36,6 +130,49 @@ func TestSearch_ReturnsMatchingPages(t *testing.T) {
|
|||||||
assert.Contains(t, results[0].Excerpt, "Retry")
|
assert.Contains(t, results[0].Excerpt, "Retry")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSearch_WingHallScoping(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
for _, p := range []struct{ rel, body string }{
|
||||||
|
{"wiki/jepa-fx/decisions/val-vol.md", "---\nwing: jepa-fx\nhall: decisions\n---\nval-vol-r2 keyword.\n"},
|
||||||
|
{"wiki/jepa-fx/facts/architecture.md", "---\nwing: jepa-fx\nhall: facts\n---\nval-vol-r2 keyword in facts.\n"},
|
||||||
|
{"wiki/hyperguild/decisions/routing.md", "---\nwing: hyperguild\nhall: decisions\n---\nval-vol-r2 reference.\n"},
|
||||||
|
{"knowledge/loose.md", "---\n---\nval-vol-r2 in knowledge.\n"},
|
||||||
|
} {
|
||||||
|
full := filepath.Join(dir, p.rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(p.body), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
// No filter: walk both knowledge/ and wiki/ — all 4 match.
|
||||||
|
got, err := search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, got, 4)
|
||||||
|
|
||||||
|
// Wing scope: 2 jepa-fx hits, no hyperguild, no knowledge.
|
||||||
|
got, err = search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10, Wing: "jepa-fx"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, got, 2)
|
||||||
|
for _, r := range got {
|
||||||
|
assert.Equal(t, "jepa-fx", r.Wing)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wing+Hall scope: 1 hit.
|
||||||
|
got, err = search.Query(dir, search.QueryOptions{Query: "val-vol-r2", Limit: 10, Wing: "jepa-fx", Hall: "decisions"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
assert.Equal(t, "jepa-fx", got[0].Wing)
|
||||||
|
assert.Equal(t, "decisions", got[0].Hall)
|
||||||
|
assert.Equal(t, "wiki/jepa-fx/decisions/val-vol.md", got[0].Path)
|
||||||
|
|
||||||
|
// Invalid hall rejected.
|
||||||
|
_, err = search.Query(dir, search.QueryOptions{Query: "x", Wing: "jepa-fx", Hall: "garbage"})
|
||||||
|
require.Error(t, err)
|
||||||
|
|
||||||
|
// Hall without wing rejected.
|
||||||
|
_, err = search.Query(dir, search.QueryOptions{Query: "x", Hall: "facts"})
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
func TestSearch_RespectsLimit(t *testing.T) {
|
func TestSearch_RespectsLimit(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "knowledge"), 0o755))
|
||||||
@@ -46,7 +183,7 @@ func TestSearch_RespectsLimit(t *testing.T) {
|
|||||||
0o644,
|
0o644,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
results, err := search.Query(dir, "retry", 3)
|
results, err := search.Query(dir, search.QueryOptions{Query: "retry", Limit: 3})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.LessOrEqual(t, len(results), 3)
|
assert.LessOrEqual(t, len(results), 3)
|
||||||
}
|
}
|
||||||
|
|||||||
137
ingestion/internal/vectorstore/chunk.go
Normal file
137
ingestion/internal/vectorstore/chunk.go
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
package vectorstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NumberedChunk pairs a chunk's body with the storage path it will use
|
||||||
|
// in brain_embeddings. Path format: "<parent>#NNNN" where NNNN is the
|
||||||
|
// 1-based chunk index zero-padded to 4 digits.
|
||||||
|
type NumberedChunk struct {
|
||||||
|
Path string
|
||||||
|
Content string
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParentPath returns the file path with any "#NNNN" chunk suffix removed.
|
||||||
|
// Inputs without a "#" are returned unchanged. Used by search to dedupe
|
||||||
|
// chunk-level hits back to a single document per result.
|
||||||
|
func ParentPath(p string) string {
|
||||||
|
if i := strings.Index(p, "#"); i >= 0 {
|
||||||
|
return p[:i]
|
||||||
|
}
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// NumberChunks assigns "<parent>#NNNN" storage paths to a slice of chunk
|
||||||
|
// bodies, indexed from 0001. Empty chunks are dropped.
|
||||||
|
func NumberChunks(parent string, chunks []string) []NumberedChunk {
|
||||||
|
out := make([]NumberedChunk, 0, len(chunks))
|
||||||
|
idx := 1
|
||||||
|
for _, c := range chunks {
|
||||||
|
if strings.TrimSpace(c) == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, NumberedChunk{
|
||||||
|
Path: fmt.Sprintf("%s#%04d", parent, idx),
|
||||||
|
Content: c,
|
||||||
|
})
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// ChunkMarkdown splits a markdown document into embedding-sized pieces.
|
||||||
|
// Strategy:
|
||||||
|
// 1. Split at H1/H2 headings (top-of-line "#" or "##"). The intro before
|
||||||
|
// the first heading is its own chunk.
|
||||||
|
// 2. Any section larger than maxBytes is further split at paragraph
|
||||||
|
// boundaries (blank lines), packing paragraphs greedily under the
|
||||||
|
// byte budget.
|
||||||
|
//
|
||||||
|
// The function aims for "fits comfortably under nomic-embed-text's 2048-
|
||||||
|
// token context" — at ~4 chars/token for English markdown, maxBytes ≈ 4000
|
||||||
|
// is a safe call-site default.
|
||||||
|
func ChunkMarkdown(content string, maxBytes int) []string {
|
||||||
|
if maxBytes <= 0 {
|
||||||
|
maxBytes = 4000
|
||||||
|
}
|
||||||
|
sections := splitAtHeadings(content)
|
||||||
|
|
||||||
|
out := make([]string, 0, len(sections))
|
||||||
|
for _, s := range sections {
|
||||||
|
if len(s) <= maxBytes {
|
||||||
|
out = append(out, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out = append(out, splitAtParagraphs(s, maxBytes)...)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitAtHeadings cuts content into sections that each start with an
|
||||||
|
// "# " or "## " line (intro before any heading is the leading section).
|
||||||
|
func splitAtHeadings(content string) []string {
|
||||||
|
lines := strings.Split(content, "\n")
|
||||||
|
var sections []string
|
||||||
|
var cur strings.Builder
|
||||||
|
flush := func() {
|
||||||
|
if cur.Len() == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Trim all trailing whitespace then re-add a single newline so a
|
||||||
|
// single-paragraph file round-trips to its original content rather
|
||||||
|
// than accumulating extra newlines from the empty-line split.
|
||||||
|
s := strings.TrimRight(cur.String(), "\n")
|
||||||
|
sections = append(sections, s+"\n")
|
||||||
|
cur.Reset()
|
||||||
|
}
|
||||||
|
for _, ln := range lines {
|
||||||
|
trimmed := strings.TrimLeft(ln, " ")
|
||||||
|
isH := strings.HasPrefix(trimmed, "# ") || strings.HasPrefix(trimmed, "## ")
|
||||||
|
if isH && cur.Len() > 0 {
|
||||||
|
flush()
|
||||||
|
}
|
||||||
|
cur.WriteString(ln)
|
||||||
|
cur.WriteByte('\n')
|
||||||
|
}
|
||||||
|
flush()
|
||||||
|
// Drop empty / whitespace-only trailing section (common when content
|
||||||
|
// itself ends with a "\n" — Split leaves a final empty element).
|
||||||
|
if n := len(sections); n > 0 && strings.TrimSpace(sections[n-1]) == "" {
|
||||||
|
sections = sections[:n-1]
|
||||||
|
}
|
||||||
|
return sections
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitAtParagraphs packs paragraphs (blank-line separated blocks) into
|
||||||
|
// sub-chunks of at most maxBytes. A single paragraph that itself exceeds
|
||||||
|
// maxBytes is emitted as one over-budget chunk rather than being split
|
||||||
|
// mid-sentence — better to over-spend a little than truncate prose.
|
||||||
|
func splitAtParagraphs(section string, maxBytes int) []string {
|
||||||
|
paras := strings.Split(section, "\n\n")
|
||||||
|
var out []string
|
||||||
|
var cur strings.Builder
|
||||||
|
for _, p := range paras {
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// +2 for the "\n\n" rejoin if cur isn't empty
|
||||||
|
need := len(p)
|
||||||
|
if cur.Len() > 0 {
|
||||||
|
need += 2
|
||||||
|
}
|
||||||
|
if cur.Len() > 0 && cur.Len()+need > maxBytes {
|
||||||
|
out = append(out, cur.String())
|
||||||
|
cur.Reset()
|
||||||
|
}
|
||||||
|
if cur.Len() > 0 {
|
||||||
|
cur.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
cur.WriteString(p)
|
||||||
|
}
|
||||||
|
if cur.Len() > 0 {
|
||||||
|
out = append(out, cur.String())
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
72
ingestion/internal/vectorstore/chunk_test.go
Normal file
72
ingestion/internal/vectorstore/chunk_test.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
package vectorstore_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestChunkMarkdown_ShortFileFitsInOne(t *testing.T) {
|
||||||
|
out := vectorstore.ChunkMarkdown("Just a short paragraph.\n", 4000)
|
||||||
|
require.Len(t, out, 1)
|
||||||
|
assert.Equal(t, "Just a short paragraph.\n", out[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunkMarkdown_SplitsAtHeadings(t *testing.T) {
|
||||||
|
src := "# Top\n\nintro\n\n## A\n\nbody a\n\n## B\n\nbody b\n"
|
||||||
|
out := vectorstore.ChunkMarkdown(src, 50) // tiny limit forces per-section split
|
||||||
|
|
||||||
|
assert.GreaterOrEqual(t, len(out), 2, "should split at H2 boundaries")
|
||||||
|
// Each chunk should start with a heading (top-level intro chunk OK without one)
|
||||||
|
for i, c := range out {
|
||||||
|
if i == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
assert.True(t, strings.HasPrefix(strings.TrimSpace(c), "#"),
|
||||||
|
"non-first chunk %d should start with heading: %q", i, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunkMarkdown_FurtherSplitsOversizedSection(t *testing.T) {
|
||||||
|
// One H2 section with 4 paragraphs of ~80 chars each, limit 100.
|
||||||
|
src := "## big\n\n" +
|
||||||
|
strings.Repeat("paragraph one is moderately long.\n\n", 1) +
|
||||||
|
strings.Repeat("paragraph two also moderately long.\n\n", 1) +
|
||||||
|
strings.Repeat("paragraph three is moderately long.\n\n", 1) +
|
||||||
|
strings.Repeat("paragraph four is moderately long.\n\n", 1)
|
||||||
|
out := vectorstore.ChunkMarkdown(src, 100)
|
||||||
|
|
||||||
|
assert.Greater(t, len(out), 1, "oversized section should sub-split at paragraph boundaries")
|
||||||
|
for i, c := range out {
|
||||||
|
assert.LessOrEqual(t, len(c), 200,
|
||||||
|
"chunk %d exceeds 2x maxBytes: %d", i, len(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunkMarkdown_PreservesContent(t *testing.T) {
|
||||||
|
src := "# H1\n\nfirst section body.\n\n## H2a\n\nsecond section body.\n\n## H2b\n\nthird section body.\n"
|
||||||
|
out := vectorstore.ChunkMarkdown(src, 50)
|
||||||
|
joined := strings.Join(out, "")
|
||||||
|
// All non-whitespace tokens from src must appear in the joined output
|
||||||
|
for _, token := range []string{"H1", "first", "H2a", "second", "H2b", "third"} {
|
||||||
|
assert.Contains(t, joined, token, "token %q missing after chunking", token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChunkMarkdown_NumberedSuffix(t *testing.T) {
|
||||||
|
out := vectorstore.NumberChunks("knowledge/foo.md", []string{"a", "b", "c"})
|
||||||
|
require.Len(t, out, 3)
|
||||||
|
assert.Equal(t, "knowledge/foo.md#0001", out[0].Path)
|
||||||
|
assert.Equal(t, "knowledge/foo.md#0002", out[1].Path)
|
||||||
|
assert.Equal(t, "knowledge/foo.md#0003", out[2].Path)
|
||||||
|
assert.Equal(t, "a", out[0].Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParentPath_StripsChunkSuffix(t *testing.T) {
|
||||||
|
assert.Equal(t, "knowledge/foo.md", vectorstore.ParentPath("knowledge/foo.md#0001"))
|
||||||
|
assert.Equal(t, "knowledge/foo.md", vectorstore.ParentPath("knowledge/foo.md"))
|
||||||
|
assert.Equal(t, "wiki/a/b.md", vectorstore.ParentPath("wiki/a/b.md#9999"))
|
||||||
|
}
|
||||||
155
ingestion/internal/vectorstore/pg.go
Normal file
155
ingestion/internal/vectorstore/pg.go
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
// Package vectorstore stores brain note embeddings in pgvector on the
|
||||||
|
// shared postgres18 instance. One row per markdown path, cosine-distance
|
||||||
|
// indexed via HNSW for sub-millisecond top-k retrieval.
|
||||||
|
package vectorstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/jackc/pgx/v5"
|
||||||
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Hit is a single result from a cosine-distance search.
|
||||||
|
type Hit struct {
|
||||||
|
Path string
|
||||||
|
Distance float64 // 0 = identical, 2 = opposite
|
||||||
|
}
|
||||||
|
|
||||||
|
// PGStore is a pgvector-backed embeddings store. Construct with New and
|
||||||
|
// call Init once to create the table + HNSW index. Use Close to release
|
||||||
|
// the underlying pool.
|
||||||
|
type PGStore struct {
|
||||||
|
pool *pgxpool.Pool
|
||||||
|
}
|
||||||
|
|
||||||
|
// New opens a connection pool against dsn (a libpq-style URL). Caller
|
||||||
|
// owns the resulting *PGStore and must invoke Close.
|
||||||
|
func New(ctx context.Context, dsn string) (*PGStore, error) {
|
||||||
|
pool, err := pgxpool.New(ctx, dsn)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("pgxpool: %w", err)
|
||||||
|
}
|
||||||
|
if err := pool.Ping(ctx); err != nil {
|
||||||
|
pool.Close()
|
||||||
|
return nil, fmt.Errorf("ping: %w", err)
|
||||||
|
}
|
||||||
|
return &PGStore{pool: pool}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close releases the underlying connection pool.
|
||||||
|
func (s *PGStore) Close() {
|
||||||
|
if s.pool != nil {
|
||||||
|
s.pool.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init creates the brain_embeddings table and its HNSW index if they
|
||||||
|
// don't already exist. Safe to call on every startup. Assumes the
|
||||||
|
// `vector` extension is already installed (one-time DBA setup; see
|
||||||
|
// scripts/brain-embeddings-init.sql).
|
||||||
|
func (s *PGStore) Init(ctx context.Context) error {
|
||||||
|
const ddl = `
|
||||||
|
CREATE TABLE IF NOT EXISTS brain_embeddings (
|
||||||
|
path TEXT PRIMARY KEY,
|
||||||
|
embedding vector(768) NOT NULL,
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS brain_embeddings_embedding_idx
|
||||||
|
ON brain_embeddings USING hnsw (embedding vector_cosine_ops);
|
||||||
|
`
|
||||||
|
_, err := s.pool.Exec(ctx, ddl)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert inserts or replaces the embedding for path. Embedding must be
|
||||||
|
// 768-dim (nomic-embed-text). Caller is responsible for normalising
|
||||||
|
// paths to forward-slash form.
|
||||||
|
func (s *PGStore) Upsert(ctx context.Context, path string, embedding []float32) error {
|
||||||
|
if len(embedding) != 768 {
|
||||||
|
return fmt.Errorf("expected 768-dim embedding, got %d", len(embedding))
|
||||||
|
}
|
||||||
|
_, err := s.pool.Exec(ctx, `
|
||||||
|
INSERT INTO brain_embeddings (path, embedding, updated_at)
|
||||||
|
VALUES ($1, $2, now())
|
||||||
|
ON CONFLICT (path) DO UPDATE
|
||||||
|
SET embedding = EXCLUDED.embedding, updated_at = now()
|
||||||
|
`, path, vectorLiteral(embedding))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes the row at path. No-op when the row doesn't exist.
|
||||||
|
func (s *PGStore) Delete(ctx context.Context, path string) error {
|
||||||
|
_, err := s.pool.Exec(ctx, `DELETE FROM brain_embeddings WHERE path = $1`, path)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search returns the top-limit nearest paths by cosine distance.
|
||||||
|
func (s *PGStore) Search(ctx context.Context, query []float32, limit int) ([]Hit, error) {
|
||||||
|
if len(query) != 768 {
|
||||||
|
return nil, fmt.Errorf("expected 768-dim query, got %d", len(query))
|
||||||
|
}
|
||||||
|
if limit <= 0 {
|
||||||
|
limit = 10
|
||||||
|
}
|
||||||
|
rows, err := s.pool.Query(ctx, `
|
||||||
|
SELECT path, embedding <=> $1 AS distance
|
||||||
|
FROM brain_embeddings
|
||||||
|
ORDER BY embedding <=> $1
|
||||||
|
LIMIT $2
|
||||||
|
`, vectorLiteral(query), limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
var hits []Hit
|
||||||
|
for rows.Next() {
|
||||||
|
var h Hit
|
||||||
|
if err := rows.Scan(&h.Path, &h.Distance); err != nil {
|
||||||
|
return nil, fmt.Errorf("scan: %w", err)
|
||||||
|
}
|
||||||
|
hits = append(hits, h)
|
||||||
|
}
|
||||||
|
if err := rows.Err(); err != nil && !errors.Is(err, pgx.ErrNoRows) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return hits, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// KnownPaths returns the path set already present in the store. Used by
|
||||||
|
// the watcher to diff against the wiki/ tree and decide what to upsert.
|
||||||
|
func (s *PGStore) KnownPaths(ctx context.Context) (map[string]struct{}, error) {
|
||||||
|
rows, err := s.pool.Query(ctx, `SELECT path FROM brain_embeddings`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query paths: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
out := make(map[string]struct{})
|
||||||
|
for rows.Next() {
|
||||||
|
var p string
|
||||||
|
if err := rows.Scan(&p); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out[p] = struct{}{}
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// vectorLiteral renders a Go float32 slice as the literal representation
|
||||||
|
// pgvector accepts as a parametric input: `[v1,v2,...,vN]`.
|
||||||
|
func vectorLiteral(v []float32) string {
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteByte('[')
|
||||||
|
for i, x := range v {
|
||||||
|
if i > 0 {
|
||||||
|
b.WriteByte(',')
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, "%g", x)
|
||||||
|
}
|
||||||
|
b.WriteByte(']')
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
91
ingestion/internal/vectorstore/pg_test.go
Normal file
91
ingestion/internal/vectorstore/pg_test.go
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
package vectorstore_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// integration tests run against a real postgres18 + pgvector. Gated by
|
||||||
|
// BRAIN_PG_TEST_DSN so `task check` stays hermetic on hosts without a
|
||||||
|
// reachable database.
|
||||||
|
//
|
||||||
|
// To run:
|
||||||
|
// BRAIN_PG_TEST_DSN='postgres://brain_app:pwd@127.0.0.1:5432/brain' \
|
||||||
|
// go test ./internal/vectorstore/... -run Integration
|
||||||
|
func dsn(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
v := os.Getenv("BRAIN_PG_TEST_DSN")
|
||||||
|
if v == "" {
|
||||||
|
t.Skip("BRAIN_PG_TEST_DSN not set; skipping pgvector integration tests")
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
func freshStore(t *testing.T) (*vectorstore.PGStore, context.Context) {
|
||||||
|
t.Helper()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
t.Cleanup(cancel)
|
||||||
|
s, err := vectorstore.New(ctx, dsn(t))
|
||||||
|
require.NoError(t, err)
|
||||||
|
t.Cleanup(s.Close)
|
||||||
|
require.NoError(t, s.Init(ctx))
|
||||||
|
// Clean slate per test.
|
||||||
|
_, _ = s.KnownPaths(ctx)
|
||||||
|
require.NoError(t, s.Delete(ctx, "%test-fixture%"))
|
||||||
|
return s, ctx
|
||||||
|
}
|
||||||
|
|
||||||
|
func vec(dim int, fill float32) []float32 {
|
||||||
|
v := make([]float32, dim)
|
||||||
|
for i := range v {
|
||||||
|
v[i] = fill
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIntegration_UpsertAndSearch(t *testing.T) {
|
||||||
|
s, ctx := freshStore(t)
|
||||||
|
|
||||||
|
require.NoError(t, s.Upsert(ctx, "wiki/a.md", vec(768, 1.0)))
|
||||||
|
require.NoError(t, s.Upsert(ctx, "wiki/b.md", vec(768, -1.0)))
|
||||||
|
|
||||||
|
hits, err := s.Search(ctx, vec(768, 1.0), 2)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.GreaterOrEqual(t, len(hits), 1)
|
||||||
|
assert.Equal(t, "wiki/a.md", hits[0].Path)
|
||||||
|
assert.InDelta(t, 0.0, hits[0].Distance, 1e-5)
|
||||||
|
|
||||||
|
t.Cleanup(func() {
|
||||||
|
_ = s.Delete(ctx, "wiki/a.md")
|
||||||
|
_ = s.Delete(ctx, "wiki/b.md")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIntegration_KnownPaths(t *testing.T) {
|
||||||
|
s, ctx := freshStore(t)
|
||||||
|
require.NoError(t, s.Upsert(ctx, "wiki/k.md", vec(768, 0.5)))
|
||||||
|
t.Cleanup(func() { _ = s.Delete(ctx, "wiki/k.md") })
|
||||||
|
|
||||||
|
paths, err := s.KnownPaths(ctx)
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := paths["wiki/k.md"]
|
||||||
|
assert.True(t, ok)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpsert_RejectsWrongDimension(t *testing.T) {
|
||||||
|
s := &vectorstore.PGStore{}
|
||||||
|
err := s.Upsert(context.Background(), "x", vec(100, 0))
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch_RejectsWrongDimension(t *testing.T) {
|
||||||
|
s := &vectorstore.PGStore{}
|
||||||
|
_, err := s.Search(context.Background(), vec(100, 0), 5)
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
170
ingestion/internal/vectorstore/sync.go
Normal file
170
ingestion/internal/vectorstore/sync.go
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
package vectorstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Embedder produces dense vectors. The embed package's Client satisfies
|
||||||
|
// this; it's declared locally so vectorstore doesn't depend on embed.
|
||||||
|
type Embedder interface {
|
||||||
|
Embed(ctx context.Context, text string) ([]float32, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store is the subset of PGStore that Sync needs. Lets tests stub it.
|
||||||
|
type Store interface {
|
||||||
|
KnownPaths(ctx context.Context) (map[string]struct{}, error)
|
||||||
|
Upsert(ctx context.Context, path string, embedding []float32) error
|
||||||
|
Delete(ctx context.Context, path string) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// SyncResult tallies what Sync did. Returned for logs / metrics; callers
|
||||||
|
// generally don't act on the fields directly.
|
||||||
|
type SyncResult struct {
|
||||||
|
Added int
|
||||||
|
Updated int
|
||||||
|
Deleted int
|
||||||
|
Errors []error
|
||||||
|
}
|
||||||
|
|
||||||
|
// scanDirs is the set of brainDir subdirectories whose .md files are
|
||||||
|
// embedded for vector retrieval. wiki/ holds LLM-extracted entity and
|
||||||
|
// source pages; knowledge/ holds curated hand-written entries.
|
||||||
|
var scanDirs = []string{"wiki", "knowledge"}
|
||||||
|
|
||||||
|
// maxChunkBytes is the per-chunk byte budget passed to ChunkMarkdown.
|
||||||
|
// Sized to fit comfortably under nomic-embed-text's 2048-token default
|
||||||
|
// context (~4 chars/token for English markdown → ~8 KB ceiling; we sit
|
||||||
|
// at 4 KB to leave headroom for unicode, code blocks, and tokenizer
|
||||||
|
// variance).
|
||||||
|
const maxChunkBytes = 4000
|
||||||
|
|
||||||
|
// Sync brings the embedding store in line with brain/{wiki,knowledge}/
|
||||||
|
// on disk:
|
||||||
|
// - new files (in the tree, not in the store) get embedded + upserted
|
||||||
|
// - files whose mtime exceeds the store's updated_at get re-embedded
|
||||||
|
// - files no longer on disk get deleted from the store
|
||||||
|
//
|
||||||
|
// Designed to be called on a ticker. Best-effort: per-file errors are
|
||||||
|
// collected into SyncResult.Errors and do not abort the run.
|
||||||
|
func Sync(ctx context.Context, brainDir string, store Store, embedder Embedder) (SyncResult, error) {
|
||||||
|
var res SyncResult
|
||||||
|
if store == nil || embedder == nil {
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
known, err := store.KnownPaths(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("known paths: %w", err)
|
||||||
|
}
|
||||||
|
// Build a parent → "any chunk known?" set so we can skip files that
|
||||||
|
// already have at least one chunk row in the store.
|
||||||
|
knownParents := make(map[string]struct{}, len(known))
|
||||||
|
for p := range known {
|
||||||
|
knownParents[ParentPath(p)] = struct{}{}
|
||||||
|
}
|
||||||
|
seenParents := make(map[string]struct{})
|
||||||
|
|
||||||
|
for _, sub := range scanDirs {
|
||||||
|
root := filepath.Join(brainDir, sub)
|
||||||
|
if _, err := os.Stat(root); os.IsNotExist(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.IsDir() || !strings.HasSuffix(path, ".md") || d.Name() == "_index.md" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rel, err := filepath.Rel(brainDir, path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
relSlash := filepath.ToSlash(rel)
|
||||||
|
seenParents[relSlash] = struct{}{}
|
||||||
|
|
||||||
|
if _, ok := knownParents[relSlash]; ok {
|
||||||
|
// File has at least one chunk in the store already.
|
||||||
|
// TODO: compare mtime once Store exposes updated_at so we
|
||||||
|
// re-embed on edit. For now, skip.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
content, readErr := os.ReadFile(path)
|
||||||
|
if readErr != nil {
|
||||||
|
res.Errors = append(res.Errors, fmt.Errorf("read %s: %w", relSlash, readErr))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
chunks := NumberChunks(relSlash, ChunkMarkdown(string(content), maxChunkBytes))
|
||||||
|
for _, ch := range chunks {
|
||||||
|
vec, embErr := embedder.Embed(ctx, ch.Content)
|
||||||
|
if embErr != nil {
|
||||||
|
res.Errors = append(res.Errors, fmt.Errorf("embed %s: %w", ch.Path, embErr))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if upErr := store.Upsert(ctx, ch.Path, vec); upErr != nil {
|
||||||
|
res.Errors = append(res.Errors, fmt.Errorf("upsert %s: %w", ch.Path, upErr))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
res.Added++
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return res, fmt.Errorf("walk %s: %w", sub, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop chunk rows whose parent file is gone.
|
||||||
|
for path := range known {
|
||||||
|
if _, ok := seenParents[ParentPath(path)]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := store.Delete(ctx, path); err != nil {
|
||||||
|
res.Errors = append(res.Errors, fmt.Errorf("delete %s: %w", path, err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
res.Deleted++
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartSync launches Sync on a ticker in a background goroutine. The
|
||||||
|
// goroutine exits when ctx is cancelled. Failures are logged via slog.
|
||||||
|
func StartSync(ctx context.Context, brainDir string, store Store, embedder Embedder, interval time.Duration) {
|
||||||
|
if interval <= 0 {
|
||||||
|
interval = 5 * time.Minute
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
t := time.NewTicker(interval)
|
||||||
|
defer t.Stop()
|
||||||
|
// Run once immediately so first-boot doesn't wait a full tick.
|
||||||
|
if r, err := Sync(ctx, brainDir, store, embedder); err != nil {
|
||||||
|
slog.Error("embed sync failed", "err", err)
|
||||||
|
} else if r.Added+r.Deleted > 0 || len(r.Errors) > 0 {
|
||||||
|
slog.Info("embed sync", "added", r.Added, "deleted", r.Deleted, "errors", len(r.Errors))
|
||||||
|
for _, e := range r.Errors {
|
||||||
|
slog.Warn("embed sync item failed", "err", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-t.C:
|
||||||
|
if r, err := Sync(ctx, brainDir, store, embedder); err != nil {
|
||||||
|
slog.Error("embed sync failed", "err", err)
|
||||||
|
} else if r.Added+r.Deleted > 0 || len(r.Errors) > 0 {
|
||||||
|
slog.Info("embed sync", "added", r.Added, "deleted", r.Deleted, "errors", len(r.Errors))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
219
ingestion/internal/vectorstore/sync_test.go
Normal file
219
ingestion/internal/vectorstore/sync_test.go
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
package vectorstore_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/hyperguild/ingestion/internal/vectorstore"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type stubStore struct {
|
||||||
|
known map[string]struct{}
|
||||||
|
upserts map[string][]float32
|
||||||
|
deletes []string
|
||||||
|
failNext error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubStore) KnownPaths(_ context.Context) (map[string]struct{}, error) {
|
||||||
|
out := make(map[string]struct{}, len(s.known))
|
||||||
|
for k := range s.known {
|
||||||
|
out[k] = struct{}{}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubStore) Upsert(_ context.Context, path string, v []float32) error {
|
||||||
|
if s.failNext != nil {
|
||||||
|
err := s.failNext
|
||||||
|
s.failNext = nil
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.upserts == nil {
|
||||||
|
s.upserts = make(map[string][]float32)
|
||||||
|
}
|
||||||
|
s.upserts[path] = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *stubStore) Delete(_ context.Context, path string) error {
|
||||||
|
s.deletes = append(s.deletes, path)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type stubEmbedder struct {
|
||||||
|
vec []float32
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e stubEmbedder) Embed(_ context.Context, _ string) ([]float32, error) {
|
||||||
|
return e.vec, e.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeNote(t *testing.T, dir, rel, body string) {
|
||||||
|
t.Helper()
|
||||||
|
full := filepath.Join(dir, rel)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(full, []byte(body), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_AddsNewFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/jepa-fx/facts/x.md", "body of x")
|
||||||
|
writeNote(t, dir, "wiki/jepa-fx/facts/y.md", "body of y")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 2, res.Added)
|
||||||
|
assert.Empty(t, res.Deleted)
|
||||||
|
assert.Contains(t, store.upserts, "wiki/jepa-fx/facts/x.md#0001")
|
||||||
|
assert.Contains(t, store.upserts, "wiki/jepa-fx/facts/y.md#0001")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_SkipsAlreadyKnown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/a/facts/x.md", "x")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{"wiki/a/facts/x.md#0001": {}}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, res.Added)
|
||||||
|
assert.Empty(t, store.upserts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_DeletesDisappearedFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki"), 0o755))
|
||||||
|
// store has a path that doesn't exist on disk anymore
|
||||||
|
store := &stubStore{known: map[string]struct{}{"wiki/old/facts/ghost.md#0001": {}}}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, &stubStoreWithDelete{stubStore: store}, stubEmbedder{vec: make([]float32, 768)})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, res.Deleted)
|
||||||
|
}
|
||||||
|
|
||||||
|
// stubStoreWithDelete is a thin wrapper to capture Delete calls;
|
||||||
|
// stubStore already implements Delete but we need the wrapper to mix
|
||||||
|
// store interfaces with sync-specific expectations.
|
||||||
|
type stubStoreWithDelete struct {
|
||||||
|
*stubStore
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_SkipsIndexFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/a/_index.md", "moc")
|
||||||
|
writeNote(t, dir, "wiki/a/facts/real.md", "body")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, stubEmbedder{vec: make([]float32, 768)})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, res.Added)
|
||||||
|
assert.NotContains(t, store.upserts, "wiki/a/_index.md#0001")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_ScansKnowledgeDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/a/facts/x.md", "x")
|
||||||
|
writeNote(t, dir, "knowledge/2026-05-19-koala-gpu-setup.md", "knowledge body")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 2, res.Added)
|
||||||
|
assert.Contains(t, store.upserts, "wiki/a/facts/x.md#0001")
|
||||||
|
assert.Contains(t, store.upserts, "knowledge/2026-05-19-koala-gpu-setup.md#0001")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_ChunksLongFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Build a file that's well over the chunk byte budget. Multi-section
|
||||||
|
// markdown so the chunker has heading boundaries to cut on.
|
||||||
|
body := "# Doc\n\nintro line.\n\n"
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
body += "## Section " + string(rune('A'+i)) + "\n\n"
|
||||||
|
body += strings.Repeat("This section has a fair amount of content. ", 50) + "\n\n"
|
||||||
|
}
|
||||||
|
writeNote(t, dir, "knowledge/long.md", body)
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Greater(t, res.Added, 1, "long file should produce multiple chunk rows")
|
||||||
|
// Every upserted path for this file must be a chunk path.
|
||||||
|
chunkCount := 0
|
||||||
|
for p := range store.upserts {
|
||||||
|
if strings.HasPrefix(p, "knowledge/long.md#") {
|
||||||
|
chunkCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, res.Added, chunkCount, "all rows for long file should be chunk-suffixed")
|
||||||
|
// The bare parent path must NOT be upserted directly.
|
||||||
|
assert.NotContains(t, store.upserts, "knowledge/long.md")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_ShortFileGetsSingleChunkRow(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/short.md", "tiny body\n")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 1, res.Added)
|
||||||
|
assert.Contains(t, store.upserts, "wiki/short.md#0001")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_SkipsFileIfAnyChunkAlreadyKnown(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/foo.md", "body\n")
|
||||||
|
|
||||||
|
store := &stubStore{known: map[string]struct{}{
|
||||||
|
"wiki/foo.md#0001": {},
|
||||||
|
}}
|
||||||
|
emb := stubEmbedder{vec: make([]float32, 768)}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, res.Added)
|
||||||
|
assert.Empty(t, store.upserts)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_DeletesAllChunksOfDisappearedFile(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Join(dir, "wiki"), 0o755))
|
||||||
|
store := &stubStore{known: map[string]struct{}{
|
||||||
|
"wiki/ghost.md#0001": {},
|
||||||
|
"wiki/ghost.md#0002": {},
|
||||||
|
"wiki/ghost.md#0003": {},
|
||||||
|
}}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, stubEmbedder{vec: make([]float32, 768)})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 3, res.Deleted)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_NoOpWhenComponentsNil(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/a/facts/x.md", "x")
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, nil, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, res.Added)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSync_CollectsEmbedderErrors(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNote(t, dir, "wiki/a/facts/x.md", "x")
|
||||||
|
store := &stubStore{known: map[string]struct{}{}}
|
||||||
|
emb := stubEmbedder{err: errors.New("upstream down")}
|
||||||
|
res, err := vectorstore.Sync(context.Background(), dir, store, emb)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 0, res.Added)
|
||||||
|
assert.Len(t, res.Errors, 1)
|
||||||
|
}
|
||||||
84
internal/auth/jwt.go
Normal file
84
internal/auth/jwt.go
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwk"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Validator validates Bearer JWTs issued by a Dex (OIDC) authorization server.
|
||||||
|
// Audience is optional; leave empty to skip audience validation.
|
||||||
|
type Validator struct {
|
||||||
|
issuer string
|
||||||
|
audience string
|
||||||
|
jwksURI string
|
||||||
|
cache *jwk.Cache
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewValidator fetches the OIDC discovery document from issuerURL, extracts
|
||||||
|
// jwks_uri, seeds the JWKS cache, and returns a ready Validator.
|
||||||
|
// If DEX_ISSUER_URL is not set the caller should pass "" and skip construction.
|
||||||
|
func NewValidator(issuerURL, audience string) (*Validator, error) {
|
||||||
|
resp, err := http.Get(issuerURL + "/.well-known/openid-configuration") //nolint:noctx
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("fetch oidc discovery: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close() //nolint:errcheck
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("oidc discovery: status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var doc struct {
|
||||||
|
JWKSURI string `json:"jwks_uri"`
|
||||||
|
}
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode oidc discovery: %w", err)
|
||||||
|
}
|
||||||
|
if doc.JWKSURI == "" {
|
||||||
|
return nil, fmt.Errorf("oidc discovery: empty jwks_uri")
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
cache := jwk.NewCache(ctx)
|
||||||
|
if err := cache.Register(doc.JWKSURI, jwk.WithMinRefreshInterval(time.Hour)); err != nil {
|
||||||
|
return nil, fmt.Errorf("register jwks cache: %w", err)
|
||||||
|
}
|
||||||
|
if _, err := cache.Refresh(ctx, doc.JWKSURI); err != nil {
|
||||||
|
return nil, fmt.Errorf("initial jwks fetch: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Validator{
|
||||||
|
issuer: issuerURL,
|
||||||
|
audience: audience,
|
||||||
|
jwksURI: doc.JWKSURI,
|
||||||
|
cache: cache,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate parses and validates rawToken. Returns the subject claim on success.
|
||||||
|
func (v *Validator) Validate(ctx context.Context, rawToken string) (string, error) {
|
||||||
|
keySet, err := v.cache.Get(ctx, v.jwksURI)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("get jwks: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := []jwt.ParseOption{
|
||||||
|
jwt.WithKeySet(keySet),
|
||||||
|
jwt.WithValidate(true),
|
||||||
|
jwt.WithIssuer(v.issuer),
|
||||||
|
}
|
||||||
|
if v.audience != "" {
|
||||||
|
opts = append(opts, jwt.WithAudience(v.audience))
|
||||||
|
}
|
||||||
|
|
||||||
|
tok, err := jwt.ParseString(rawToken, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("validate jwt: %w", err)
|
||||||
|
}
|
||||||
|
return tok.Subject(), nil
|
||||||
|
}
|
||||||
169
internal/auth/jwt_test.go
Normal file
169
internal/auth/jwt_test.go
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
package auth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"crypto/rsa"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwa"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwk"
|
||||||
|
"github.com/lestrrat-go/jwx/v2/jwt"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/auth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testKeys struct {
|
||||||
|
priv jwk.Key
|
||||||
|
pub jwk.Key
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRSAKeys(t *testing.T) testKeys {
|
||||||
|
t.Helper()
|
||||||
|
raw, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
priv, err := jwk.FromRaw(raw)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, priv.Set(jwk.KeyIDKey, "test-kid"))
|
||||||
|
require.NoError(t, priv.Set(jwk.AlgorithmKey, jwa.RS256))
|
||||||
|
|
||||||
|
pub, err := jwk.PublicKeyOf(priv)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return testKeys{priv: priv, pub: pub}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mockOIDCServer(t *testing.T, keys testKeys) *httptest.Server {
|
||||||
|
t.Helper()
|
||||||
|
set := jwk.NewSet()
|
||||||
|
require.NoError(t, set.AddKey(keys.pub))
|
||||||
|
jwksBytes, err := json.Marshal(set)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
var srv *httptest.Server
|
||||||
|
mux.HandleFunc("/.well-known/openid-configuration", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]string{
|
||||||
|
"issuer": srv.URL,
|
||||||
|
"jwks_uri": srv.URL + "/jwks",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/jwks", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write(jwksBytes)
|
||||||
|
})
|
||||||
|
srv = httptest.NewServer(mux)
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
return srv
|
||||||
|
}
|
||||||
|
|
||||||
|
func signToken(t *testing.T, keys testKeys, issuer, audience, subject string, exp time.Time) string {
|
||||||
|
t.Helper()
|
||||||
|
b := jwt.NewBuilder().
|
||||||
|
Issuer(issuer).
|
||||||
|
Subject(subject).
|
||||||
|
Expiration(exp)
|
||||||
|
if audience != "" {
|
||||||
|
b = b.Audience([]string{audience})
|
||||||
|
}
|
||||||
|
tok, err := b.Build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
signed, err := jwt.Sign(tok, jwt.WithKey(jwa.RS256, keys.priv))
|
||||||
|
require.NoError(t, err)
|
||||||
|
return string(signed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidator(t *testing.T) {
|
||||||
|
keys := generateRSAKeys(t)
|
||||||
|
srv := mockOIDCServer(t, keys)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
v, err := auth.NewValidator(srv.URL, "brain")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
token string
|
||||||
|
wantSub string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid jwt",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantSub: "test-user",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "expired jwt",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(-time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrong issuer",
|
||||||
|
token: signToken(t, keys, "https://evil.example.com", "brain", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrong audience",
|
||||||
|
token: signToken(t, keys, srv.URL, "other-service", "test-user", time.Now().Add(time.Hour)),
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tampered token",
|
||||||
|
token: signToken(t, keys, srv.URL, "brain", "test-user", time.Now().Add(time.Hour)) + "tampered",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "not a jwt",
|
||||||
|
token: "not-a-jwt",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
sub, err := v.Validate(ctx, tc.token)
|
||||||
|
if tc.wantErr {
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Empty(t, sub)
|
||||||
|
} else {
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, tc.wantSub, sub)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewValidator_NoAudience(t *testing.T) {
|
||||||
|
keys := generateRSAKeys(t)
|
||||||
|
srv := mockOIDCServer(t, keys)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
v, err := auth.NewValidator(srv.URL, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Token without audience passes when audience validation is disabled.
|
||||||
|
tok, err := jwt.NewBuilder().
|
||||||
|
Issuer(srv.URL).
|
||||||
|
Subject("sub").
|
||||||
|
Expiration(time.Now().Add(time.Hour)).
|
||||||
|
Build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
signed, err := jwt.Sign(tok, jwt.WithKey(jwa.RS256, keys.priv))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sub, err := v.Validate(ctx, string(signed))
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "sub", sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewValidator_BadDiscoveryURL(t *testing.T) {
|
||||||
|
_, err := auth.NewValidator("http://127.0.0.1:1", "brain")
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
23
internal/auth/protected_resource.go
Normal file
23
internal/auth/protected_resource.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProtectedResourceHandler returns an RFC 9728 oauth-protected-resource metadata
|
||||||
|
// handler. Mount at GET /.well-known/oauth-protected-resource (no auth required).
|
||||||
|
func ProtectedResourceHandler(resourceURL, issuerURL string) http.HandlerFunc {
|
||||||
|
type metadata struct {
|
||||||
|
Resource string `json:"resource"`
|
||||||
|
AuthorizationServers []string `json:"authorization_servers"`
|
||||||
|
}
|
||||||
|
body, _ := json.Marshal(metadata{
|
||||||
|
Resource: resourceURL,
|
||||||
|
AuthorizationServers: []string{issuerURL},
|
||||||
|
})
|
||||||
|
return func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
28
internal/auth/protected_resource_test.go
Normal file
28
internal/auth/protected_resource_test.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
package auth_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/auth"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestProtectedResourceHandler(t *testing.T) {
|
||||||
|
h := auth.ProtectedResourceHandler("https://brain-mcp.d-ma.be", "https://auth.d-ma.be")
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/.well-known/oauth-protected-resource", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
h(rr, req)
|
||||||
|
|
||||||
|
assert.Equal(t, http.StatusOK, rr.Code)
|
||||||
|
assert.Equal(t, "application/json", rr.Header().Get("Content-Type"))
|
||||||
|
|
||||||
|
var body map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &body))
|
||||||
|
assert.Equal(t, "https://brain-mcp.d-ma.be", body["resource"])
|
||||||
|
servers := body["authorization_servers"].([]any)
|
||||||
|
assert.Equal(t, "https://auth.d-ma.be", servers[0])
|
||||||
|
}
|
||||||
@@ -25,6 +25,16 @@ type RoutingConfig struct {
|
|||||||
RouteLocalFloor float64 // HYPERGUILD_ROUTE_LOCAL_FLOOR, default 0.90
|
RouteLocalFloor float64 // HYPERGUILD_ROUTE_LOCAL_FLOOR, default 0.90
|
||||||
RouteLocalCeil float64 // HYPERGUILD_ROUTE_LOCAL_CEIL, default 0.70
|
RouteLocalCeil float64 // HYPERGUILD_ROUTE_LOCAL_CEIL, default 0.70
|
||||||
PassRateTTLSeconds int // HYPERGUILD_PASS_RATE_TTL_SECONDS, default 60
|
PassRateTTLSeconds int // HYPERGUILD_PASS_RATE_TTL_SECONDS, default 60
|
||||||
|
|
||||||
|
// project_create configuration. Empty GiteaMCPURL disables the
|
||||||
|
// project_create tool registration so the routing pod still starts
|
||||||
|
// in environments where it's not wired up.
|
||||||
|
GiteaMCPURL string // GITEA_MCP_URL, e.g. http://koala:30340/mcp
|
||||||
|
GiteaMCPToken string // GITEA_MCP_TOKEN, bearer for gitea-mcp
|
||||||
|
GiteaOwner string // GITEA_OWNER, default mathias
|
||||||
|
GitHubOwner string // GITHUB_OWNER, default mathiasb
|
||||||
|
InfraRepo string // INFRA_REPO, default infra
|
||||||
|
GitHubPAT string // GITHUB_PAT, repo scope; never logged
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadRouting() (RoutingConfig, error) {
|
func LoadRouting() (RoutingConfig, error) {
|
||||||
@@ -56,6 +66,13 @@ func LoadRouting() (RoutingConfig, error) {
|
|||||||
}
|
}
|
||||||
cfg.PassRateTTLSeconds = ttl
|
cfg.PassRateTTLSeconds = ttl
|
||||||
|
|
||||||
|
cfg.GiteaMCPURL = os.Getenv("GITEA_MCP_URL")
|
||||||
|
cfg.GiteaMCPToken = os.Getenv("GITEA_MCP_TOKEN")
|
||||||
|
cfg.GiteaOwner = envOr("GITEA_OWNER", "mathias")
|
||||||
|
cfg.GitHubOwner = envOr("GITHUB_OWNER", "mathiasb")
|
||||||
|
cfg.InfraRepo = envOr("INFRA_REPO", "infra")
|
||||||
|
cfg.GitHubPAT = os.Getenv("GITHUB_PAT")
|
||||||
|
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
108
internal/githubclient/client.go
Normal file
108
internal/githubclient/client.go
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
// Package githubclient is a minimal GitHub REST API client. The hyperguild
|
||||||
|
// project_create flow is gitea-first; this client exists only to create an
|
||||||
|
// empty repo on GitHub before the gitea→github push-mirror is configured,
|
||||||
|
// since the mirror cannot push to a non-existent remote.
|
||||||
|
package githubclient
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultAPI = "https://api.github.com"
|
||||||
|
|
||||||
|
type Client struct {
|
||||||
|
api string
|
||||||
|
token string
|
||||||
|
http *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// New returns a Client with the given personal access token (repo scope).
|
||||||
|
func New(token string) *Client {
|
||||||
|
return &Client{
|
||||||
|
api: defaultAPI,
|
||||||
|
token: token,
|
||||||
|
http: &http.Client{Timeout: 30 * time.Second},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithBaseURL overrides the API base (test injection).
|
||||||
|
func (c *Client) WithBaseURL(u string) *Client {
|
||||||
|
c.api = u
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// Repo is the subset of GitHub's repo response we surface upstream.
|
||||||
|
type Repo struct {
|
||||||
|
FullName string `json:"full_name"`
|
||||||
|
HTMLURL string `json:"html_url"`
|
||||||
|
CloneURL string `json:"clone_url"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type createRepoArgs struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
AutoInit bool `json:"auto_init"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrAlreadyExists is returned by CreateRepo when GitHub responds 422 with
|
||||||
|
// "name already exists". Callers treat it as idempotent success.
|
||||||
|
var ErrAlreadyExists = fmt.Errorf("github repo already exists")
|
||||||
|
|
||||||
|
// CreateRepo creates a repo under the authenticated user's account.
|
||||||
|
// auto_init is always false — the push-mirror will populate the repo from
|
||||||
|
// gitea, so an auto-generated README would conflict on first push.
|
||||||
|
func (c *Client) CreateRepo(ctx context.Context, name, description string, private bool) (*Repo, error) {
|
||||||
|
if c.token == "" {
|
||||||
|
return nil, fmt.Errorf("github pat not configured")
|
||||||
|
}
|
||||||
|
body, _ := json.Marshal(createRepoArgs{
|
||||||
|
Name: name,
|
||||||
|
Description: description,
|
||||||
|
Private: private,
|
||||||
|
AutoInit: false,
|
||||||
|
})
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.api+"/user/repos", bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("new request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", "token "+c.token)
|
||||||
|
req.Header.Set("Accept", "application/vnd.github+json")
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("X-GitHub-Api-Version", "2022-11-28")
|
||||||
|
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("http: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
|
raw, _ := io.ReadAll(resp.Body)
|
||||||
|
switch resp.StatusCode {
|
||||||
|
case http.StatusCreated:
|
||||||
|
var r Repo
|
||||||
|
if err := json.Unmarshal(raw, &r); err != nil {
|
||||||
|
return nil, fmt.Errorf("decode response: %w", err)
|
||||||
|
}
|
||||||
|
return &r, nil
|
||||||
|
case http.StatusUnprocessableEntity:
|
||||||
|
// 422 covers "name already exists" + a handful of other validation
|
||||||
|
// errors. Treat any 422 that mentions "already exists" as idempotent
|
||||||
|
// success; everything else surfaces verbatim.
|
||||||
|
if bytes.Contains(raw, []byte("already exists")) {
|
||||||
|
return nil, ErrAlreadyExists
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("github 422: %s", string(raw))
|
||||||
|
case http.StatusUnauthorized, http.StatusForbidden:
|
||||||
|
return nil, fmt.Errorf("github auth %d: PAT missing repo scope or invalid", resp.StatusCode)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("github %d: %s", resp.StatusCode, string(raw))
|
||||||
|
}
|
||||||
|
}
|
||||||
71
internal/githubclient/client_test.go
Normal file
71
internal/githubclient/client_test.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
package githubclient_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/githubclient"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCreateRepo_Success(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, http.MethodPost, r.Method)
|
||||||
|
assert.Equal(t, "/user/repos", r.URL.Path)
|
||||||
|
assert.Equal(t, "token ghp_test", r.Header.Get("Authorization"))
|
||||||
|
var args map[string]any
|
||||||
|
b, _ := io.ReadAll(r.Body)
|
||||||
|
_ = json.Unmarshal(b, &args)
|
||||||
|
assert.Equal(t, "test-repo", args["name"])
|
||||||
|
assert.Equal(t, true, args["private"])
|
||||||
|
assert.Equal(t, false, args["auto_init"])
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
_, _ = w.Write([]byte(`{"full_name":"mathiasb/test-repo","html_url":"https://github.com/mathiasb/test-repo","clone_url":"https://github.com/mathiasb/test-repo.git","private":true}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := githubclient.New("ghp_test").WithBaseURL(srv.URL)
|
||||||
|
r, err := c.CreateRepo(context.Background(), "test-repo", "desc", true)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "mathiasb/test-repo", r.FullName)
|
||||||
|
assert.True(t, r.Private)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateRepo_AlreadyExists(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnprocessableEntity)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"Validation Failed","errors":[{"resource":"Repository","code":"custom","field":"name","message":"name already exists on this account"}]}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := githubclient.New("ghp_test").WithBaseURL(srv.URL)
|
||||||
|
_, err := c.CreateRepo(context.Background(), "x", "", false)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.True(t, errors.Is(err, githubclient.ErrAlreadyExists))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateRepo_Unauthorized(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"Bad credentials"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c := githubclient.New("ghp_test").WithBaseURL(srv.URL)
|
||||||
|
_, err := c.CreateRepo(context.Background(), "x", "", false)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "PAT missing repo scope")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateRepo_NoToken(t *testing.T) {
|
||||||
|
c := githubclient.New("")
|
||||||
|
_, err := c.CreateRepo(context.Background(), "x", "", false)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "github pat not configured")
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/auth"
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -34,13 +35,14 @@ type rpcError struct {
|
|||||||
type Server struct {
|
type Server struct {
|
||||||
reg *registry.Registry
|
reg *registry.Registry
|
||||||
token string
|
token string
|
||||||
|
validator *auth.Validator
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer constructs an MCP HTTP handler. If token is non-empty, every
|
// NewServer constructs an MCP HTTP handler. token is the static bearer token
|
||||||
// request must carry "Authorization: Bearer <token>" or it is rejected with
|
// (empty disables static auth). validator is optional; when non-nil, a valid
|
||||||
// HTTP 401 and JSON-RPC error -32001. Empty token disables auth (default).
|
// JWT from Dex is accepted in addition to the static token.
|
||||||
func NewServer(reg *registry.Registry, token string) *Server {
|
func NewServer(reg *registry.Registry, token string, validator *auth.Validator) *Server {
|
||||||
return &Server{reg: reg, token: token}
|
return &Server{reg: reg, token: token, validator: validator}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -120,17 +122,35 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkAuth verifies the bearer token when one is configured. Returns true if
|
// checkAuth verifies the bearer token. Accepts a valid Dex JWT (when validator
|
||||||
// the request may proceed, false if it has been rejected (401 already written).
|
// is configured) or the static token. Returns true if the request may proceed.
|
||||||
|
// When neither token nor validator is configured, auth is disabled (default).
|
||||||
func (s *Server) checkAuth(w http.ResponseWriter, r *http.Request) bool {
|
func (s *Server) checkAuth(w http.ResponseWriter, r *http.Request) bool {
|
||||||
if s.token == "" {
|
if s.token == "" && s.validator == nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
const prefix = "Bearer "
|
rawToken, ok := strings.CutPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||||
hdr := r.Header.Get("Authorization")
|
if !ok {
|
||||||
if !strings.HasPrefix(hdr, prefix) ||
|
s.rejectAuth(w, r)
|
||||||
subtle.ConstantTimeCompare([]byte(hdr[len(prefix):]), []byte(s.token)) != 1 {
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.validator != nil {
|
||||||
|
if _, err := s.validator.Validate(r.Context(), rawToken); err == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.token != "" && subtle.ConstantTimeCompare([]byte(rawToken), []byte(s.token)) == 1 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
s.rejectAuth(w, r)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) rejectAuth(w http.ResponseWriter, r *http.Request) {
|
||||||
slog.Warn("mcp auth rejected", "remote", r.RemoteAddr, "method", r.Method)
|
slog.Warn("mcp auth rejected", "remote", r.RemoteAddr, "method", r.Method)
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
w.WriteHeader(http.StatusUnauthorized)
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
@@ -138,9 +158,6 @@ func (s *Server) checkAuth(w http.ResponseWriter, r *http.Request) bool {
|
|||||||
JSONRPC: "2.0",
|
JSONRPC: "2.0",
|
||||||
Error: &rpcError{Code: -32001, Message: "unauthorized"},
|
Error: &rpcError{Code: -32001, Message: "unauthorized"},
|
||||||
})
|
})
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func writeError(w http.ResponseWriter, id any, code int, msg string) {
|
func writeError(w http.ResponseWriter, id any, code int, msg string) {
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ func jsonBody(t *testing.T, v any) *bytes.Buffer {
|
|||||||
|
|
||||||
func TestMCPInitialize(t *testing.T) {
|
func TestMCPInitialize(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, "")
|
srv := mcp.NewServer(reg, "", nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
||||||
"jsonrpc": "2.0",
|
"jsonrpc": "2.0",
|
||||||
@@ -45,7 +45,7 @@ func TestMCPInitialize(t *testing.T) {
|
|||||||
|
|
||||||
func TestMCPToolsList(t *testing.T) {
|
func TestMCPToolsList(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, "")
|
srv := mcp.NewServer(reg, "", nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": map[string]any{},
|
"jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": map[string]any{},
|
||||||
@@ -63,7 +63,7 @@ func TestMCPToolsList(t *testing.T) {
|
|||||||
|
|
||||||
func TestMCPUnknownMethod(t *testing.T) {
|
func TestMCPUnknownMethod(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, "")
|
srv := mcp.NewServer(reg, "", nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 3, "method": "unknown/method", "params": map[string]any{},
|
"jsonrpc": "2.0", "id": 3, "method": "unknown/method", "params": map[string]any{},
|
||||||
@@ -80,7 +80,7 @@ func TestMCPUnknownMethod(t *testing.T) {
|
|||||||
|
|
||||||
func TestMCPNotificationKnownMethodGetsNoResponseBody(t *testing.T) {
|
func TestMCPNotificationKnownMethodGetsNoResponseBody(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, "")
|
srv := mcp.NewServer(reg, "", nil)
|
||||||
|
|
||||||
// JSON-RPC 2.0 notification: "id" field absent. Per spec, server MUST NOT
|
// JSON-RPC 2.0 notification: "id" field absent. Per spec, server MUST NOT
|
||||||
// reply. notifications/initialized is part of the standard MCP handshake.
|
// reply. notifications/initialized is part of the standard MCP handshake.
|
||||||
@@ -116,7 +116,7 @@ func TestMCPAuth(t *testing.T) {
|
|||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, tc.token)
|
srv := mcp.NewServer(reg, tc.token, nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
||||||
"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": map[string]any{},
|
"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": map[string]any{},
|
||||||
@@ -142,7 +142,7 @@ func TestMCPAuth(t *testing.T) {
|
|||||||
|
|
||||||
func TestMCPNotificationUnknownMethodGetsNoResponseBody(t *testing.T) {
|
func TestMCPNotificationUnknownMethodGetsNoResponseBody(t *testing.T) {
|
||||||
reg := registry.New()
|
reg := registry.New()
|
||||||
srv := mcp.NewServer(reg, "")
|
srv := mcp.NewServer(reg, "", nil)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
req := httptest.NewRequest(http.MethodPost, "/mcp", jsonBody(t, map[string]any{
|
||||||
"jsonrpc": "2.0",
|
"jsonrpc": "2.0",
|
||||||
|
|||||||
150
internal/mcpclient/client.go
Normal file
150
internal/mcpclient/client.go
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
// Package mcpclient is a minimal JSON-RPC over HTTP client for talking to
|
||||||
|
// MCP servers from inside hyperguild components. It only implements
|
||||||
|
// `tools/call` because that's all consumer skills need today.
|
||||||
|
package mcpclient
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Client calls an MCP server over Streamable HTTP / JSON-RPC.
|
||||||
|
type Client struct {
|
||||||
|
url string
|
||||||
|
token string
|
||||||
|
http *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrTokenRequired is returned by New when token is empty. Empty token
|
||||||
|
// causes mcpclient to omit the Authorization header at request time,
|
||||||
|
// which is silently misread as 401 by bearer-auth servers — see
|
||||||
|
// hyperguild #13 and the brain entry on the failure mode.
|
||||||
|
var ErrTokenRequired = errors.New("mcpclient: token required")
|
||||||
|
|
||||||
|
// New returns a Client. Returns ErrTokenRequired when token is empty:
|
||||||
|
// every MCP server we talk to today is bearer-protected, and an empty
|
||||||
|
// token is always a configuration bug (typically a Kubernetes Secret
|
||||||
|
// missing the expected key, see hyperguild #13). Callers that genuinely
|
||||||
|
// need an unauthenticated client should construct &Client{} directly in
|
||||||
|
// tests, not call New.
|
||||||
|
func New(url, token string) (*Client, error) {
|
||||||
|
if token == "" {
|
||||||
|
return nil, ErrTokenRequired
|
||||||
|
}
|
||||||
|
return &Client{
|
||||||
|
url: url,
|
||||||
|
token: token,
|
||||||
|
http: &http.Client{Timeout: 60 * time.Second},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithHTTPClient overrides the underlying HTTP client (test injection).
|
||||||
|
func (c *Client) WithHTTPClient(h *http.Client) *Client {
|
||||||
|
c.http = h
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
type rpcRequest struct {
|
||||||
|
JSONRPC string `json:"jsonrpc"`
|
||||||
|
ID int `json:"id"`
|
||||||
|
Method string `json:"method"`
|
||||||
|
Params map[string]any `json:"params"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type rpcError struct {
|
||||||
|
Code int `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type rpcResponse struct {
|
||||||
|
JSONRPC string `json:"jsonrpc"`
|
||||||
|
ID int `json:"id"`
|
||||||
|
Result json.RawMessage `json:"result,omitempty"`
|
||||||
|
Error *rpcError `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error is returned when the remote MCP server signals a typed failure.
|
||||||
|
// Code follows JSON-RPC conventions; see gitea-mcp internal/mcp/jsonrpc.go
|
||||||
|
// for the codes the server uses (e.g. -32002 NotFound, -32003 Conflict).
|
||||||
|
type Error struct {
|
||||||
|
Code int
|
||||||
|
Message string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Error) Error() string { return fmt.Sprintf("mcp error %d: %s", e.Code, e.Message) }
|
||||||
|
|
||||||
|
// CallTool issues `tools/call`. result is JSON-unmarshalled from the
|
||||||
|
// server's content[0].text field; pass nil to discard.
|
||||||
|
func (c *Client) CallTool(ctx context.Context, name string, args any, result any) error {
|
||||||
|
body, err := json.Marshal(rpcRequest{
|
||||||
|
JSONRPC: "2.0",
|
||||||
|
ID: 1,
|
||||||
|
Method: "tools/call",
|
||||||
|
Params: map[string]any{
|
||||||
|
"name": name,
|
||||||
|
"arguments": args,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url, bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("new request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
if c.token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("http: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
|
raw, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read body: %w", err)
|
||||||
|
}
|
||||||
|
if resp.StatusCode >= 400 {
|
||||||
|
return fmt.Errorf("mcp http %d: %s", resp.StatusCode, string(raw))
|
||||||
|
}
|
||||||
|
|
||||||
|
var rpc rpcResponse
|
||||||
|
if err := json.Unmarshal(raw, &rpc); err != nil {
|
||||||
|
return fmt.Errorf("decode response: %w (body=%s)", err, string(raw))
|
||||||
|
}
|
||||||
|
if rpc.Error != nil {
|
||||||
|
return &Error{Code: rpc.Error.Code, Message: rpc.Error.Message}
|
||||||
|
}
|
||||||
|
|
||||||
|
if result == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MCP success result shape: { content: [{type:"text", text:"<json>"}] }
|
||||||
|
var wrap struct {
|
||||||
|
Content []struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Text string `json:"text"`
|
||||||
|
} `json:"content"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(rpc.Result, &wrap); err != nil {
|
||||||
|
return fmt.Errorf("decode wrap: %w (result=%s)", err, string(rpc.Result))
|
||||||
|
}
|
||||||
|
if len(wrap.Content) == 0 {
|
||||||
|
return fmt.Errorf("empty content in tool response")
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(wrap.Content[0].Text), result); err != nil {
|
||||||
|
return fmt.Errorf("decode tool result text: %w (text=%s)", err, wrap.Content[0].Text)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
92
internal/mcpclient/client_test.go
Normal file
92
internal/mcpclient/client_test.go
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
package mcpclient_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/mcpclient"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNew_EmptyTokenFailsFast(t *testing.T) {
|
||||||
|
c, err := mcpclient.New("http://example.invalid", "")
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Nil(t, c)
|
||||||
|
require.ErrorIs(t, err, mcpclient.ErrTokenRequired)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallTool_Success(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
assert.Equal(t, http.MethodPost, r.Method)
|
||||||
|
assert.Equal(t, "Bearer tok", r.Header.Get("Authorization"))
|
||||||
|
b, _ := io.ReadAll(r.Body)
|
||||||
|
var got map[string]any
|
||||||
|
_ = json.Unmarshal(b, &got)
|
||||||
|
assert.Equal(t, "tools/call", got["method"])
|
||||||
|
params := got["params"].(map[string]any)
|
||||||
|
assert.Equal(t, "x_y", params["name"])
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"{\"ok\":true,\"n\":7}"}]}}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c, err := mcpclient.New(srv.URL, "tok")
|
||||||
|
require.NoError(t, err)
|
||||||
|
var out struct {
|
||||||
|
OK bool `json:"ok"`
|
||||||
|
N int `json:"n"`
|
||||||
|
}
|
||||||
|
require.NoError(t, c.CallTool(context.Background(), "x_y", map[string]any{"a": 1}, &out))
|
||||||
|
assert.True(t, out.OK)
|
||||||
|
assert.Equal(t, 7, out.N)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallTool_RPCError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"jsonrpc":"2.0","id":1,"error":{"code":-32003,"message":"already exists"}}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c, err := mcpclient.New(srv.URL, "test")
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = c.CallTool(context.Background(), "x", nil, nil)
|
||||||
|
require.Error(t, err)
|
||||||
|
var me *mcpclient.Error
|
||||||
|
require.True(t, errors.As(err, &me))
|
||||||
|
assert.Equal(t, -32003, me.Code)
|
||||||
|
assert.Contains(t, me.Message, "already exists")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallTool_HTTPError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
_, _ = w.Write([]byte(`unauthorized`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c, err := mcpclient.New(srv.URL, "test")
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = c.CallTool(context.Background(), "x", nil, nil)
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), "401")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallTool_NilResult(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"{}"}]}}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
c, err := mcpclient.New(srv.URL, "test")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, c.CallTool(context.Background(), "x", nil, nil))
|
||||||
|
}
|
||||||
297
internal/skills/project/handlers.go
Normal file
297
internal/skills/project/handlers.go
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
package project
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/githubclient"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/mcpclient"
|
||||||
|
)
|
||||||
|
|
||||||
|
type createArgs struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Hypothesis string `json:"hypothesis"`
|
||||||
|
Folder string `json:"folder"`
|
||||||
|
Stack string `json:"stack"`
|
||||||
|
Private bool `json:"private"`
|
||||||
|
MirrorToGitHub bool `json:"mirror_to_github,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type createResult struct {
|
||||||
|
GiteaURL string `json:"gitea_url"`
|
||||||
|
GitHubURL string `json:"github_url"`
|
||||||
|
IssueURL string `json:"issue_url"`
|
||||||
|
NextSteps string `json:"next_steps"`
|
||||||
|
|
||||||
|
// Reached records the steps that completed. Populated on partial failure
|
||||||
|
// so callers can resume manually instead of guessing what already ran.
|
||||||
|
Reached []string `json:"reached,omitempty"`
|
||||||
|
|
||||||
|
// FailedStep is non-empty when a downstream gitea-mcp call returned an
|
||||||
|
// error; the error itself is surfaced via the JSON-RPC error response,
|
||||||
|
// this field tells the operator which step it happened in.
|
||||||
|
FailedStep string `json:"failed_step,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func errUnknownTool(name string) error { return fmt.Errorf("unknown tool: %s", name) }
|
||||||
|
|
||||||
|
// step names — must match what we surface in failed_step / reached.
|
||||||
|
const (
|
||||||
|
stepCreateRepo = "create_repo"
|
||||||
|
stepCreateGitHub = "create_github_repo"
|
||||||
|
stepMirror = "mirror"
|
||||||
|
stepInfraCommit = "infra_commit"
|
||||||
|
stepIssue = "issue"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *Skill) handleCreate(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var args createArgs
|
||||||
|
if err := json.Unmarshal(raw, &args); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse args: %w", err)
|
||||||
|
}
|
||||||
|
if err := validate(args); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpl := templateFor(args.Stack)
|
||||||
|
giteaURL := fmt.Sprintf("http://gitea.d-ma.be/%s/%s", s.cfg.GiteaOwner, args.Name)
|
||||||
|
|
||||||
|
res := createResult{
|
||||||
|
GiteaURL: giteaURL,
|
||||||
|
}
|
||||||
|
if args.MirrorToGitHub {
|
||||||
|
res.GitHubURL = fmt.Sprintf("https://github.com/%s/%s", s.cfg.GitHubOwner, args.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: create_project_from_template. If the repo already exists,
|
||||||
|
// gitea-mcp returns -32003 Conflict; we treat that as idempotent success
|
||||||
|
// and continue to the next steps so re-running self-heals partial runs.
|
||||||
|
existed, err := s.callCreateRepo(ctx, args, tmpl)
|
||||||
|
if err != nil {
|
||||||
|
return marshalPartial(res, stepCreateRepo, err)
|
||||||
|
}
|
||||||
|
res.Reached = append(res.Reached, stepCreateRepo)
|
||||||
|
|
||||||
|
// Steps 2+3 are skipped when MirrorToGitHub is false. Default per
|
||||||
|
// infra ADR (Gitea as true master, GitHub as optional opt-in): keep
|
||||||
|
// client / business-logic / personal repos Gitea-only. Set
|
||||||
|
// `mirror_to_github: true` for open-source projects that want a
|
||||||
|
// public GitHub mirror (hyperguild, gitea-mcp, template-*).
|
||||||
|
if args.MirrorToGitHub {
|
||||||
|
// Step 2: create empty GitHub repo. Gitea's push-mirror cannot push
|
||||||
|
// to a non-existent remote, so the destination must exist before
|
||||||
|
// step 3 configures the mirror. Skipped when GitHub client is unset
|
||||||
|
// (degraded mode — see Config.GitHub doc).
|
||||||
|
if s.cfg.GitHub != nil {
|
||||||
|
if err := s.callCreateGitHubRepo(ctx, args); err != nil && !errors.Is(err, githubclient.ErrAlreadyExists) {
|
||||||
|
return marshalPartial(res, stepCreateGitHub, err)
|
||||||
|
}
|
||||||
|
res.Reached = append(res.Reached, stepCreateGitHub)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: configure push mirror to GitHub. Idempotent: if a mirror with
|
||||||
|
// the same remote already exists, gitea-mcp returns Conflict; we swallow it.
|
||||||
|
if err := s.callMirror(ctx, args.Name); err != nil {
|
||||||
|
if !isConflict(err) {
|
||||||
|
return marshalPartial(res, stepMirror, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.Reached = append(res.Reached, stepMirror)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: commit staging namespace manifest to infra repo. Done before
|
||||||
|
// the issue so the staging env is reconciling by the time the issue lands.
|
||||||
|
if err := s.callInfraCommit(ctx, args.Name); err != nil {
|
||||||
|
if !isConflict(err) {
|
||||||
|
return marshalPartial(res, stepInfraCommit, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.Reached = append(res.Reached, stepInfraCommit)
|
||||||
|
|
||||||
|
// Step 4: open the experiment-brief issue on the new repo.
|
||||||
|
issueURL, err := s.callIssue(ctx, args, existed)
|
||||||
|
if err != nil {
|
||||||
|
return marshalPartial(res, stepIssue, err)
|
||||||
|
}
|
||||||
|
res.IssueURL = issueURL
|
||||||
|
res.Reached = append(res.Reached, stepIssue)
|
||||||
|
|
||||||
|
folder := args.Folder
|
||||||
|
if folder == "" {
|
||||||
|
folder = "."
|
||||||
|
}
|
||||||
|
res.NextSteps = fmt.Sprintf(
|
||||||
|
"cd ~/dev/%s/%s && task new-project -- %s personal %s %s && git remote add origin http://gitea.d-ma.be/%s/%s.git && git push -u origin main",
|
||||||
|
folder, args.Name, args.Name, folder, args.Stack, s.cfg.GiteaOwner, args.Name,
|
||||||
|
)
|
||||||
|
|
||||||
|
return marshalResult(res)
|
||||||
|
}
|
||||||
|
|
||||||
|
// callCreateRepo invokes create_project_from_template. Returns (existed, err)
|
||||||
|
// where existed=true means the destination was already present and we should
|
||||||
|
// treat it as a no-op success (idempotency).
|
||||||
|
func (s *Skill) callCreateRepo(ctx context.Context, args createArgs, template string) (bool, error) {
|
||||||
|
var out struct {
|
||||||
|
HTMLURL string `json:"html_url"`
|
||||||
|
}
|
||||||
|
err := s.cfg.Client.CallTool(ctx, "create_project_from_template", map[string]any{
|
||||||
|
"owner": s.cfg.GiteaOwner,
|
||||||
|
"name": args.Name,
|
||||||
|
"description": args.Description,
|
||||||
|
"private": args.Private,
|
||||||
|
"template_name": template,
|
||||||
|
}, &out)
|
||||||
|
if err == nil {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
if isConflict(err) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// callCreateGitHubRepo creates the empty destination repo on GitHub.
|
||||||
|
// auto_init=false in githubclient so first push from gitea doesn't conflict
|
||||||
|
// with an auto-generated README.
|
||||||
|
func (s *Skill) callCreateGitHubRepo(ctx context.Context, args createArgs) error {
|
||||||
|
_, err := s.cfg.GitHub.CreateRepo(ctx, args.Name, args.Description, args.Private)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// callMirror configures the push mirror to GitHub.
|
||||||
|
func (s *Skill) callMirror(ctx context.Context, name string) error {
|
||||||
|
remote := fmt.Sprintf("https://github.com/%s/%s.git", s.cfg.GitHubOwner, name)
|
||||||
|
return s.cfg.Client.CallTool(ctx, "repo_mirror_push", map[string]any{
|
||||||
|
"owner": s.cfg.GiteaOwner,
|
||||||
|
"name": name,
|
||||||
|
"action": "add",
|
||||||
|
"remote_address": remote,
|
||||||
|
"remote_username": s.cfg.GitHubOwner,
|
||||||
|
"remote_password": s.cfg.GitHubPAT,
|
||||||
|
"interval": "8h0m0s",
|
||||||
|
"sync_on_commit": true,
|
||||||
|
}, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// callInfraCommit writes the staging namespace manifest directly to infra
|
||||||
|
// main. Flux reconciles within ~60s. See DECISIONS.md 2026-05-18.
|
||||||
|
func (s *Skill) callInfraCommit(ctx context.Context, name string) error {
|
||||||
|
manifest := stagingNamespaceManifest(name, time.Now().UTC().Format(time.RFC3339))
|
||||||
|
return s.cfg.Client.CallTool(ctx, "file_write_branch", map[string]any{
|
||||||
|
"owner": s.cfg.GiteaOwner,
|
||||||
|
"name": s.cfg.InfraRepo,
|
||||||
|
"path": fmt.Sprintf("k3s/staging/%s/namespace.yaml", name),
|
||||||
|
"content": manifest,
|
||||||
|
"branch": "main",
|
||||||
|
"message": fmt.Sprintf("feat(staging): add namespace for %s\n\nGenerated by hyperguild project_create.", name),
|
||||||
|
}, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// callIssue opens the experiment-brief issue on the newly-created repo.
|
||||||
|
// existed=true (repo pre-existed) still posts a new brief — repeated runs
|
||||||
|
// can intentionally restate intent without colliding.
|
||||||
|
func (s *Skill) callIssue(ctx context.Context, args createArgs, existed bool) (string, error) {
|
||||||
|
body := experimentBrief(args, existed)
|
||||||
|
var out struct {
|
||||||
|
HTMLURL string `json:"html_url"`
|
||||||
|
}
|
||||||
|
err := s.cfg.Client.CallTool(ctx, "issue_create", map[string]any{
|
||||||
|
"owner": s.cfg.GiteaOwner,
|
||||||
|
"name": args.Name,
|
||||||
|
"title": "experiment brief: " + args.Description,
|
||||||
|
"body": body,
|
||||||
|
}, &out)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return out.HTMLURL, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func stagingNamespaceManifest(name, createdAt string) string {
|
||||||
|
return fmt.Sprintf(`apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: staging-%s
|
||||||
|
labels:
|
||||||
|
managed-by: hyperguild
|
||||||
|
project: %s
|
||||||
|
created-at: "%s"
|
||||||
|
`, name, name, createdAt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func experimentBrief(args createArgs, existed bool) string {
|
||||||
|
var b strings.Builder
|
||||||
|
b.WriteString("## Hypothesis\n\n")
|
||||||
|
b.WriteString(args.Hypothesis)
|
||||||
|
b.WriteString("\n\n## Description\n\n")
|
||||||
|
b.WriteString(args.Description)
|
||||||
|
b.WriteString("\n\n## Stack\n\n`")
|
||||||
|
b.WriteString(args.Stack)
|
||||||
|
b.WriteString("`\n\n## Provisioning\n\n")
|
||||||
|
b.WriteString("- Repo created from `template-")
|
||||||
|
b.WriteString(args.Stack)
|
||||||
|
b.WriteString("` on Gitea.\n")
|
||||||
|
if args.MirrorToGitHub {
|
||||||
|
b.WriteString("- Push-mirror configured to GitHub.\n")
|
||||||
|
} else {
|
||||||
|
b.WriteString("- Gitea-only (no GitHub mirror — set `mirror_to_github: true` to opt in).\n")
|
||||||
|
}
|
||||||
|
b.WriteString("- Staging namespace manifest committed to infra repo.\n\n")
|
||||||
|
if existed {
|
||||||
|
b.WriteString("> Note: this repo already existed when `project_create` ran — provisioning steps were re-applied idempotently.\n")
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func validate(args createArgs) error {
|
||||||
|
if args.Name == "" {
|
||||||
|
return errors.New("name is required")
|
||||||
|
}
|
||||||
|
if args.Description == "" {
|
||||||
|
return errors.New("description is required")
|
||||||
|
}
|
||||||
|
if args.Hypothesis == "" {
|
||||||
|
return errors.New("hypothesis is required")
|
||||||
|
}
|
||||||
|
if args.Stack != "go-agent" && args.Stack != "go-web" {
|
||||||
|
return fmt.Errorf("stack must be go-agent or go-web, got %q", args.Stack)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func templateFor(stack string) string {
|
||||||
|
switch stack {
|
||||||
|
case "go-agent":
|
||||||
|
return "template-go-agent"
|
||||||
|
default:
|
||||||
|
return "template-go-web"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isConflict(err error) bool {
|
||||||
|
var me *mcpclient.Error
|
||||||
|
if errors.As(err, &me) && me.Code == -32003 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func marshalResult(r createResult) (json.RawMessage, error) {
|
||||||
|
b, err := json.Marshal(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("marshal result: %w", err)
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func marshalPartial(r createResult, step string, inner error) (json.RawMessage, error) {
|
||||||
|
r.FailedStep = step
|
||||||
|
b, _ := json.Marshal(r)
|
||||||
|
return b, fmt.Errorf("project_create step %q failed: %w", step, inner)
|
||||||
|
}
|
||||||
419
internal/skills/project/handlers_test.go
Normal file
419
internal/skills/project/handlers_test.go
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
package project_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/githubclient"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/mcpclient"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/skills/project"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeGitHub captures POST /user/repos calls.
|
||||||
|
type fakeGitHub struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
Calls []map[string]any
|
||||||
|
ReturnError int // 0 = 201 Created, 422 = already exists, etc.
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *fakeGitHub) handler() http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var args map[string]any
|
||||||
|
_ = json.NewDecoder(r.Body).Decode(&args)
|
||||||
|
g.mu.Lock()
|
||||||
|
g.Calls = append(g.Calls, args)
|
||||||
|
code := g.ReturnError
|
||||||
|
g.mu.Unlock()
|
||||||
|
switch code {
|
||||||
|
case 0:
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
_, _ = w.Write([]byte(`{"full_name":"mathiasb/x","html_url":"https://github.com/mathiasb/x","clone_url":"https://github.com/mathiasb/x.git"}`))
|
||||||
|
case 422:
|
||||||
|
w.WriteHeader(http.StatusUnprocessableEntity)
|
||||||
|
_, _ = w.Write([]byte(`{"errors":[{"message":"name already exists on this account"}]}`))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(code)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"boom"}`))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// fakeGiteaMCP implements just enough of the JSON-RPC tools/call surface
|
||||||
|
// to drive project_create end-to-end without an actual gitea-mcp server.
|
||||||
|
type fakeGiteaMCP struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
// Recorded calls in order.
|
||||||
|
Calls []recordedCall
|
||||||
|
// Per-tool response. Default is a generic success object.
|
||||||
|
Responses map[string]any
|
||||||
|
// Per-tool error response, takes precedence over Responses.
|
||||||
|
Errors map[string]rpcErr
|
||||||
|
}
|
||||||
|
|
||||||
|
type rpcErr struct {
|
||||||
|
Code int
|
||||||
|
Message string
|
||||||
|
}
|
||||||
|
|
||||||
|
type recordedCall struct {
|
||||||
|
Tool string
|
||||||
|
Args map[string]any
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeGiteaMCP) handler() http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
Params json.RawMessage `json:"params"`
|
||||||
|
}
|
||||||
|
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||||
|
var p struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Arguments json.RawMessage `json:"arguments"`
|
||||||
|
}
|
||||||
|
_ = json.Unmarshal(req.Params, &p)
|
||||||
|
var args map[string]any
|
||||||
|
_ = json.Unmarshal(p.Arguments, &args)
|
||||||
|
|
||||||
|
f.mu.Lock()
|
||||||
|
f.Calls = append(f.Calls, recordedCall{Tool: p.Name, Args: args})
|
||||||
|
errResp, hasErr := f.Errors[p.Name]
|
||||||
|
var resp any
|
||||||
|
if r, ok := f.Responses[p.Name]; ok {
|
||||||
|
resp = r
|
||||||
|
} else {
|
||||||
|
resp = map[string]any{"html_url": "http://gitea.example/" + p.Name}
|
||||||
|
}
|
||||||
|
f.mu.Unlock()
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if hasErr {
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": req.ID,
|
||||||
|
"error": map[string]any{"code": errResp.Code, "message": errResp.Message},
|
||||||
|
})
|
||||||
|
_, _ = w.Write(body)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respText, _ := json.Marshal(resp)
|
||||||
|
body, _ := json.Marshal(map[string]any{
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": req.ID,
|
||||||
|
"result": map[string]any{
|
||||||
|
"content": []map[string]any{{"type": "text", "text": string(respText)}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
_, _ = w.Write(body)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSkill(t *testing.T, f *fakeGiteaMCP) (*project.Skill, *fakeGitHub) {
|
||||||
|
t.Helper()
|
||||||
|
srv := httptest.NewServer(f.handler())
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
|
||||||
|
gh := &fakeGitHub{}
|
||||||
|
ghSrv := httptest.NewServer(gh.handler())
|
||||||
|
t.Cleanup(ghSrv.Close)
|
||||||
|
|
||||||
|
return project.New(project.Config{
|
||||||
|
Client: mustClient(t, srv.URL),
|
||||||
|
GitHub: githubclient.New("ghp_test").WithBaseURL(ghSrv.URL),
|
||||||
|
GiteaOwner: "mathias",
|
||||||
|
GitHubOwner: "mathiasb",
|
||||||
|
GitHubPAT: "ghp_test",
|
||||||
|
InfraRepo: "infra",
|
||||||
|
}), gh
|
||||||
|
}
|
||||||
|
|
||||||
|
// newSkillNoGitHub builds a skill with the GitHub client unset — degraded
|
||||||
|
// mode where the github-repo-creation step is skipped.
|
||||||
|
func newSkillNoGitHub(t *testing.T, f *fakeGiteaMCP) *project.Skill {
|
||||||
|
t.Helper()
|
||||||
|
srv := httptest.NewServer(f.handler())
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
return project.New(project.Config{
|
||||||
|
Client: mustClient(t, srv.URL),
|
||||||
|
GiteaOwner: "mathias",
|
||||||
|
GitHubOwner: "mathiasb",
|
||||||
|
InfraRepo: "infra",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// mustClient builds an mcpclient against an httptest server. Uses a
|
||||||
|
// non-empty dummy token because httptest servers don't enforce bearer
|
||||||
|
// auth, but mcpclient.New now requires non-empty token (see #13).
|
||||||
|
func mustClient(t *testing.T, url string) *mcpclient.Client {
|
||||||
|
t.Helper()
|
||||||
|
c, err := mcpclient.New(url, "test-token")
|
||||||
|
require.NoError(t, err)
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// happyArgs returns the minimal valid request. With the Gitea-as-true-master
|
||||||
|
// ADR shipped, this defaults to Gitea-only (mirror_to_github omitted = false).
|
||||||
|
// Tests that need the full Gitea + GitHub mirror flow use mirroredArgs().
|
||||||
|
func happyArgs() json.RawMessage {
|
||||||
|
return json.RawMessage(`{
|
||||||
|
"name":"my-experiment",
|
||||||
|
"description":"One-line desc",
|
||||||
|
"hypothesis":"We believe X produces Y",
|
||||||
|
"folder":"AGENTS",
|
||||||
|
"stack":"go-agent",
|
||||||
|
"private":true
|
||||||
|
}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mirroredArgs is happyArgs + mirror_to_github=true — the explicit opt-in
|
||||||
|
// path. Equivalent to the pre-ADR default.
|
||||||
|
func mirroredArgs() json.RawMessage {
|
||||||
|
return json.RawMessage(`{
|
||||||
|
"name":"my-experiment",
|
||||||
|
"description":"One-line desc",
|
||||||
|
"hypothesis":"We believe X produces Y",
|
||||||
|
"folder":"AGENTS",
|
||||||
|
"stack":"go-agent",
|
||||||
|
"private":true,
|
||||||
|
"mirror_to_github":true
|
||||||
|
}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_HappyPath(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Responses: map[string]any{
|
||||||
|
"issue_create": map[string]any{"html_url": "http://gitea.d-ma.be/mathias/my-experiment/issues/1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, gh := newSkill(t, f)
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment", res["gitea_url"])
|
||||||
|
assert.Equal(t, "https://github.com/mathiasb/my-experiment", res["github_url"])
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment/issues/1", res["issue_url"])
|
||||||
|
assert.Contains(t, res["next_steps"], "cd ~/dev/AGENTS/my-experiment")
|
||||||
|
assert.Contains(t, res["next_steps"], "git remote add origin")
|
||||||
|
|
||||||
|
// All 4 gitea-mcp calls in order.
|
||||||
|
require.Len(t, f.Calls, 4)
|
||||||
|
assert.Equal(t, "create_project_from_template", f.Calls[0].Tool)
|
||||||
|
assert.Equal(t, "repo_mirror_push", f.Calls[1].Tool)
|
||||||
|
assert.Equal(t, "file_write_branch", f.Calls[2].Tool)
|
||||||
|
assert.Equal(t, "issue_create", f.Calls[3].Tool)
|
||||||
|
|
||||||
|
// GitHub repo created between create_project_from_template and mirror.
|
||||||
|
require.Len(t, gh.Calls, 1)
|
||||||
|
assert.Equal(t, "my-experiment", gh.Calls[0]["name"])
|
||||||
|
assert.Equal(t, true, gh.Calls[0]["private"])
|
||||||
|
assert.Equal(t, false, gh.Calls[0]["auto_init"])
|
||||||
|
|
||||||
|
// template selection wired from stack
|
||||||
|
assert.Equal(t, "template-go-agent", f.Calls[0].Args["template_name"])
|
||||||
|
// mirror config
|
||||||
|
assert.Equal(t, "add", f.Calls[1].Args["action"])
|
||||||
|
assert.Equal(t, "https://github.com/mathiasb/my-experiment.git", f.Calls[1].Args["remote_address"])
|
||||||
|
assert.Equal(t, "ghp_test", f.Calls[1].Args["remote_password"])
|
||||||
|
// infra commit path
|
||||||
|
assert.Equal(t, "k3s/staging/my-experiment/namespace.yaml", f.Calls[2].Args["path"])
|
||||||
|
assert.Contains(t, f.Calls[2].Args["content"], "name: staging-my-experiment")
|
||||||
|
assert.Contains(t, f.Calls[2].Args["content"], "managed-by: hyperguild")
|
||||||
|
// PAT must NOT appear in the response
|
||||||
|
assert.NotContains(t, string(out), "ghp_test")
|
||||||
|
|
||||||
|
// reached records the github step too.
|
||||||
|
reached := res["reached"].([]any)
|
||||||
|
assert.Equal(t, []any{"create_repo", "create_github_repo", "mirror", "infra_commit", "issue"}, reached)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_GitHubExists_Idempotent(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Responses: map[string]any{
|
||||||
|
"issue_create": map[string]any{"html_url": "http://gitea.d-ma.be/mathias/my-experiment/issues/1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, gh := newSkill(t, f)
|
||||||
|
gh.ReturnError = 422 // already exists
|
||||||
|
|
||||||
|
_, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.NoError(t, err, "422 already-exists should be idempotent")
|
||||||
|
require.Len(t, f.Calls, 4, "all gitea steps still run despite github 422")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_GitHubFails(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{}
|
||||||
|
skill, gh := newSkill(t, f)
|
||||||
|
gh.ReturnError = 401 // bad PAT
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.Error(t, err)
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
assert.Equal(t, "create_github_repo", res["failed_step"])
|
||||||
|
assert.Equal(t, []any{"create_repo"}, res["reached"])
|
||||||
|
require.Len(t, f.Calls, 1, "mirror + later steps must not run when github creation fails")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_NoGitHubClient_DegradedMode(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Responses: map[string]any{
|
||||||
|
"issue_create": map[string]any{"html_url": "http://gitea.d-ma.be/mathias/my-experiment/issues/1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill := newSkillNoGitHub(t, f)
|
||||||
|
|
||||||
|
// Use mirroredArgs so we exercise the GitHub-mirror path. With the
|
||||||
|
// GitHub client nil, the create_github_repo step is skipped but the
|
||||||
|
// mirror step still attempts to configure the push-mirror remote
|
||||||
|
// (degraded mode preserves the prior contract for opted-in projects).
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.NoError(t, err)
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
// reached does NOT include create_github_repo when client is nil.
|
||||||
|
reached := res["reached"].([]any)
|
||||||
|
assert.Equal(t, []any{"create_repo", "mirror", "infra_commit", "issue"}, reached)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_Idempotent_RepoExists(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Errors: map[string]rpcErr{
|
||||||
|
"create_project_from_template": {Code: -32003, Message: "already exists"},
|
||||||
|
},
|
||||||
|
Responses: map[string]any{
|
||||||
|
"issue_create": map[string]any{"html_url": "http://gitea.d-ma.be/mathias/my-experiment/issues/1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, _ := newSkill(t, f)
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment", res["gitea_url"])
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment/issues/1", res["issue_url"])
|
||||||
|
|
||||||
|
// Still ran all 4 gitea-mcp steps; idempotent flow falls through.
|
||||||
|
require.Len(t, f.Calls, 4)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_MirrorFails(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Errors: map[string]rpcErr{
|
||||||
|
"repo_mirror_push": {Code: -32000, Message: "github unreachable"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, _ := newSkill(t, f)
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), `"mirror" failed`)
|
||||||
|
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
assert.Equal(t, "mirror", res["failed_step"])
|
||||||
|
reached := res["reached"].([]any)
|
||||||
|
assert.Equal(t, []any{"create_repo", "create_github_repo"}, reached)
|
||||||
|
|
||||||
|
// Steps 1 (create) + 2 (mirror attempt) reached gitea; github made 1 call.
|
||||||
|
require.Len(t, f.Calls, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_InfraCommitFails(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Errors: map[string]rpcErr{
|
||||||
|
"file_write_branch": {Code: -32000, Message: "write rejected"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, _ := newSkill(t, f)
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", mirroredArgs())
|
||||||
|
require.Error(t, err)
|
||||||
|
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
assert.Equal(t, "infra_commit", res["failed_step"])
|
||||||
|
reached := res["reached"].([]any)
|
||||||
|
assert.Equal(t, []any{"create_repo", "create_github_repo", "mirror"}, reached)
|
||||||
|
require.Len(t, f.Calls, 3)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_ValidationErrors(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{}
|
||||||
|
skill, _ := newSkill(t, f)
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
body string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"missing name", `{"description":"d","hypothesis":"h","stack":"go-agent"}`, "name"},
|
||||||
|
{"missing description", `{"name":"x","hypothesis":"h","stack":"go-agent"}`, "description"},
|
||||||
|
{"missing hypothesis", `{"name":"x","description":"d","stack":"go-agent"}`, "hypothesis"},
|
||||||
|
{"bad stack", `{"name":"x","description":"d","hypothesis":"h","stack":"python"}`, "stack"},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
_, err := skill.Handle(context.Background(), "project_create", json.RawMessage(tc.body))
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.True(t, strings.Contains(err.Error(), tc.want), "want %q in %v", tc.want, err)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
assert.Empty(t, f.Calls, "no upstream calls should occur on validation failure")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_DefaultSkipsGitHubMirror(t *testing.T) {
|
||||||
|
// Default (mirror_to_github omitted) skips create_github_repo + mirror
|
||||||
|
// per the Gitea-as-true-master ADR. Gitea repo + staging namespace
|
||||||
|
// + issue still run; github_url is empty in the response.
|
||||||
|
f := &fakeGiteaMCP{
|
||||||
|
Responses: map[string]any{
|
||||||
|
"issue_create": map[string]any{"html_url": "http://gitea.d-ma.be/mathias/my-experiment/issues/1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
skill, gh := newSkill(t, f)
|
||||||
|
|
||||||
|
out, err := skill.Handle(context.Background(), "project_create", happyArgs())
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var res map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(out, &res))
|
||||||
|
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment", res["gitea_url"])
|
||||||
|
assert.Equal(t, "", res["github_url"], "github_url must be empty when mirror not opted in")
|
||||||
|
assert.Equal(t, "http://gitea.d-ma.be/mathias/my-experiment/issues/1", res["issue_url"])
|
||||||
|
|
||||||
|
// 3 gitea-mcp calls: template create, staging file write, issue. NO mirror call.
|
||||||
|
require.Len(t, f.Calls, 3)
|
||||||
|
assert.Equal(t, "create_project_from_template", f.Calls[0].Tool)
|
||||||
|
assert.Equal(t, "file_write_branch", f.Calls[1].Tool)
|
||||||
|
assert.Equal(t, "issue_create", f.Calls[2].Tool)
|
||||||
|
|
||||||
|
// Zero GitHub API calls.
|
||||||
|
assert.Empty(t, gh.Calls, "no GitHub repo created when mirror_to_github is false")
|
||||||
|
|
||||||
|
// reached lists the Gitea-only path.
|
||||||
|
reached := res["reached"].([]any)
|
||||||
|
assert.Equal(t, []any{"create_repo", "infra_commit", "issue"}, reached)
|
||||||
|
|
||||||
|
// experiment-brief body reflects Gitea-only provisioning.
|
||||||
|
require.Contains(t, f.Calls[2].Args["body"], "Gitea-only")
|
||||||
|
require.NotContains(t, f.Calls[2].Args["body"], "Push-mirror configured")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProjectCreate_UnknownTool(t *testing.T) {
|
||||||
|
f := &fakeGiteaMCP{}
|
||||||
|
skill, _ := newSkill(t, f)
|
||||||
|
_, err := skill.Handle(context.Background(), "nope", happyArgs())
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
109
internal/skills/project/skill.go
Normal file
109
internal/skills/project/skill.go
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
// Package project implements the `project_create` MCP tool: a single-call
|
||||||
|
// pipeline that creates a Gitea repo from a template, configures push-mirror
|
||||||
|
// to GitHub, commits a staging namespace manifest to the infra repo, and
|
||||||
|
// opens an experiment-brief issue on the new repo. See hyperguild gitea
|
||||||
|
// issue #10 for the design.
|
||||||
|
package project
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
"github.com/mathiasbq/supervisor/internal/githubclient"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/mcpclient"
|
||||||
|
"github.com/mathiasbq/supervisor/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config holds the orchestration dependencies for the project skill.
|
||||||
|
type Config struct {
|
||||||
|
// Client talks to the gitea-mcp server. project_create makes
|
||||||
|
// sequential calls (create_project_from_template, repo_mirror_push,
|
||||||
|
// file_write_branch, issue_create) through this client.
|
||||||
|
Client *mcpclient.Client
|
||||||
|
|
||||||
|
// GitHub is the client used to create the empty destination repo on
|
||||||
|
// GitHub before the push-mirror is configured. Gitea's push-mirror
|
||||||
|
// cannot push to a non-existent remote, so this step is mandatory
|
||||||
|
// when GitHubPAT is set. Pass nil to skip github repo creation
|
||||||
|
// entirely (degraded mode — mirror config will land but the actual
|
||||||
|
// sync to github will fail until the repo exists).
|
||||||
|
GitHub *githubclient.Client
|
||||||
|
|
||||||
|
// GiteaOwner is the org/user that owns the new repo and the infra repo
|
||||||
|
// the namespace manifest is committed to (typically "mathias").
|
||||||
|
GiteaOwner string
|
||||||
|
|
||||||
|
// GitHubOwner is the GitHub org/user the push-mirror targets
|
||||||
|
// (typically "mathiasb").
|
||||||
|
GitHubOwner string
|
||||||
|
|
||||||
|
// GitHubPAT is the personal access token used as the push-mirror
|
||||||
|
// password and to create the destination repo on GitHub. Must have
|
||||||
|
// `repo` scope. Never logged.
|
||||||
|
GitHubPAT string
|
||||||
|
|
||||||
|
// InfraRepo is the name of the infra repo on Gitea where the
|
||||||
|
// k3s/staging/<name>/namespace.yaml manifest gets committed
|
||||||
|
// (typically "infra").
|
||||||
|
InfraRepo string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skill exposes project_create as an MCP tool.
|
||||||
|
type Skill struct{ cfg Config }
|
||||||
|
|
||||||
|
// New constructs the project Skill.
|
||||||
|
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
||||||
|
|
||||||
|
// Name returns the skill identifier.
|
||||||
|
func (s *Skill) Name() string { return "project" }
|
||||||
|
|
||||||
|
// Tools returns the MCP tool definitions for this skill.
|
||||||
|
func (s *Skill) Tools() []registry.ToolDef {
|
||||||
|
schema, _ := json.Marshal(map[string]any{
|
||||||
|
"type": "object",
|
||||||
|
"properties": map[string]any{
|
||||||
|
"name": map[string]any{
|
||||||
|
"type": "string",
|
||||||
|
"pattern": `^[a-z][a-z0-9-]{1,38}[a-z0-9]$`,
|
||||||
|
"description": "Lowercase repo name. 3-40 chars, must start with a letter.",
|
||||||
|
},
|
||||||
|
"description": map[string]any{"type": "string"},
|
||||||
|
"hypothesis": map[string]any{"type": "string"},
|
||||||
|
"folder": map[string]any{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Informational only — appears in next_steps. Example: AGENTS, AI, QKX.",
|
||||||
|
},
|
||||||
|
"stack": map[string]any{
|
||||||
|
"type": "string",
|
||||||
|
"enum": []string{"go-agent", "go-web"},
|
||||||
|
"description": "Selects template-go-agent or template-go-web.",
|
||||||
|
},
|
||||||
|
"private": map[string]any{"type": "boolean"},
|
||||||
|
"mirror_to_github": map[string]any{
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Default false. When true, also create an empty GitHub repo " +
|
||||||
|
"and configure a push-mirror from Gitea. Opt-in per the Gitea-as-true-master " +
|
||||||
|
"ADR — only set true for open-source projects (hyperguild, gitea-mcp, template-*). " +
|
||||||
|
"Never set true for client projects, business logic, or personal experiments.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": []string{"name", "description", "hypothesis", "stack"},
|
||||||
|
})
|
||||||
|
return []registry.ToolDef{
|
||||||
|
{
|
||||||
|
Name: "project_create",
|
||||||
|
Description: "Bootstrap a new project: Gitea repo from template, staging namespace manifest, " +
|
||||||
|
"experiment-brief issue. Optionally mirrors to GitHub when `mirror_to_github: true` " +
|
||||||
|
"(default false). Idempotent — re-running with an existing repo returns the existing URLs.",
|
||||||
|
InputSchema: schema,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle dispatches the tool call.
|
||||||
|
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
||||||
|
if tool != "project_create" {
|
||||||
|
return nil, errUnknownTool(tool)
|
||||||
|
}
|
||||||
|
return s.handleCreate(ctx, args)
|
||||||
|
}
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
// internal/skills/spec/handlers.go
|
|
||||||
package spec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/brain"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
|
||||||
)
|
|
||||||
|
|
||||||
type specArgs struct {
|
|
||||||
ProjectRoot string `json:"project_root"`
|
|
||||||
Requirements string `json:"requirements"`
|
|
||||||
OutputPath string `json:"output_path"`
|
|
||||||
Context string `json:"context"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
SessionID string `json:"session_id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle dispatches the MCP tool call to the appropriate handler.
|
|
||||||
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
|
||||||
if tool != "spec" {
|
|
||||||
return nil, fmt.Errorf("unknown tool: %s", tool)
|
|
||||||
}
|
|
||||||
var a specArgs
|
|
||||||
if err := json.Unmarshal(args, &a); err != nil {
|
|
||||||
return nil, fmt.Errorf("parse args: %w", err)
|
|
||||||
}
|
|
||||||
if a.ProjectRoot == "" {
|
|
||||||
return nil, fmt.Errorf("project_root is required")
|
|
||||||
}
|
|
||||||
if a.Requirements == "" {
|
|
||||||
return nil, fmt.Errorf("requirements is required")
|
|
||||||
}
|
|
||||||
outputPath := a.OutputPath
|
|
||||||
if outputPath == "" {
|
|
||||||
outputPath = "docs/spec.md"
|
|
||||||
}
|
|
||||||
|
|
||||||
model := a.Model
|
|
||||||
if model == "" {
|
|
||||||
model = s.cfg.DefaultModel
|
|
||||||
}
|
|
||||||
|
|
||||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, a.Requirements+" "+a.Context, 3)
|
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
|
||||||
"phase: spec\nproject_root: %s\nrequirements: %s\noutput_path: %s\ncontext: %s\nmodel: %s",
|
|
||||||
a.ProjectRoot, a.Requirements, outputPath, a.Context, model,
|
|
||||||
)
|
|
||||||
task = session.PrependHistory(s.cfg.SessionsDir, a.SessionID, "spec", task)
|
|
||||||
if brainCtx != "" {
|
|
||||||
task = brainCtx + "\n---\n\n" + task
|
|
||||||
}
|
|
||||||
|
|
||||||
if s.cfg.CompleteFunc == nil {
|
|
||||||
return nil, fmt.Errorf("no executor configured")
|
|
||||||
}
|
|
||||||
t0 := time.Now()
|
|
||||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if a.SessionID != "" && s.cfg.SessionsDir != "" {
|
|
||||||
msg := text
|
|
||||||
if len(msg) > 200 {
|
|
||||||
msg = msg[:200]
|
|
||||||
}
|
|
||||||
_ = session.Append(s.cfg.SessionsDir, a.SessionID, session.Entry{
|
|
||||||
SessionID: a.SessionID,
|
|
||||||
Timestamp: time.Now(),
|
|
||||||
Skill: "spec",
|
|
||||||
Phase: "spec",
|
|
||||||
ProjectRoot: a.ProjectRoot,
|
|
||||||
FinalStatus: "ok",
|
|
||||||
ModelUsed: model,
|
|
||||||
DurationMs: time.Since(t0).Milliseconds(),
|
|
||||||
Message: msg,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
// internal/skills/spec/handlers_test.go
|
|
||||||
package spec_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/spec"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestSpecToolRegistered(t *testing.T) {
|
|
||||||
sk := spec.New(spec.Config{SkillPrompt: "spec rules"})
|
|
||||||
names := make([]string, 0)
|
|
||||||
for _, tool := range sk.Tools() {
|
|
||||||
names = append(names, tool.Name)
|
|
||||||
}
|
|
||||||
assert.Contains(t, names, "spec")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSpecRequiresProjectRoot(t *testing.T) {
|
|
||||||
sk := spec.New(spec.Config{SkillPrompt: "s"})
|
|
||||||
_, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"requirements":"add login"}`))
|
|
||||||
assert.ErrorContains(t, err, "project_root")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSpecRequiresRequirements(t *testing.T) {
|
|
||||||
sk := spec.New(spec.Config{SkillPrompt: "s"})
|
|
||||||
_, err := sk.Handle(context.Background(), "spec", json.RawMessage(`{"project_root":"/tmp"}`))
|
|
||||||
assert.ErrorContains(t, err, "requirements")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSpecCallsCompleteFunc(t *testing.T) {
|
|
||||||
var capturedTask string
|
|
||||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
|
||||||
capturedTask = user
|
|
||||||
return "# OAuth2 Login Spec\n\n## Overview\nImplement OAuth2 login flow.", 110, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
sk := spec.New(spec.Config{SkillPrompt: "spec rules", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
|
||||||
out, err := sk.Handle(context.Background(), "spec", json.RawMessage(
|
|
||||||
`{"project_root":"/tmp/proj","requirements":"add OAuth2 login","output_path":"docs/login-spec.md"}`,
|
|
||||||
))
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Contains(t, capturedTask, "OAuth2 login")
|
|
||||||
assert.Contains(t, capturedTask, "docs/login-spec.md")
|
|
||||||
|
|
||||||
var result map[string]any
|
|
||||||
require.NoError(t, json.Unmarshal(out, &result))
|
|
||||||
assert.Contains(t, result["text"], "OAuth2 Login Spec")
|
|
||||||
}
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
// internal/skills/spec/skill.go
|
|
||||||
package spec
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
// CompleteFunc is the function used to call a local model.
|
|
||||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
|
||||||
|
|
||||||
// Config holds dependencies for the spec skill.
|
|
||||||
type Config struct {
|
|
||||||
SkillPrompt string
|
|
||||||
DefaultModel string
|
|
||||||
CompleteFunc CompleteFunc
|
|
||||||
SessionsDir string
|
|
||||||
IngestBaseURL string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skill implements the spec MCP tool.
|
|
||||||
type Skill struct{ cfg Config }
|
|
||||||
|
|
||||||
// New creates a new spec Skill.
|
|
||||||
func New(cfg Config) *Skill { return &Skill{cfg: cfg} }
|
|
||||||
|
|
||||||
// Name returns the skill identifier.
|
|
||||||
func (s *Skill) Name() string { return "spec" }
|
|
||||||
|
|
||||||
// Tools returns the MCP tool definitions for this skill.
|
|
||||||
func (s *Skill) Tools() []registry.ToolDef {
|
|
||||||
schema := func(required []string, props map[string]any) json.RawMessage {
|
|
||||||
b, _ := json.Marshal(map[string]any{"type": "object", "required": required, "properties": props})
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
str := map[string]any{"type": "string"}
|
|
||||||
return []registry.ToolDef{
|
|
||||||
{
|
|
||||||
Name: "spec",
|
|
||||||
Description: "Consult a local model to draft a structured implementation spec from requirements. Returns the spec text.",
|
|
||||||
InputSchema: schema(
|
|
||||||
[]string{"project_root", "requirements"},
|
|
||||||
map[string]any{
|
|
||||||
"project_root": str,
|
|
||||||
"requirements": str,
|
|
||||||
"output_path": str,
|
|
||||||
"context": str,
|
|
||||||
"model": str,
|
|
||||||
"session_id": str,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,173 +0,0 @@
|
|||||||
package tdd
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/brain"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (s *Skill) Handle(ctx context.Context, tool string, args json.RawMessage) (json.RawMessage, error) {
|
|
||||||
switch tool {
|
|
||||||
case "tdd_red":
|
|
||||||
return s.handleRed(ctx, args)
|
|
||||||
case "tdd_green":
|
|
||||||
return s.handleGreen(ctx, args)
|
|
||||||
case "tdd_refactor":
|
|
||||||
return s.handleRefactor(ctx, args)
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unknown tool: %s", tool)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type redArgs struct {
|
|
||||||
ProjectRoot string `json:"project_root"`
|
|
||||||
Spec string `json:"spec"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
TestCmd string `json:"test_cmd"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) handleRed(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
|
||||||
var args redArgs
|
|
||||||
if err := json.Unmarshal(raw, &args); err != nil {
|
|
||||||
return nil, fmt.Errorf("parse args: %w", err)
|
|
||||||
}
|
|
||||||
if args.ProjectRoot == "" {
|
|
||||||
return nil, fmt.Errorf("project_root is required")
|
|
||||||
}
|
|
||||||
if args.Spec == "" {
|
|
||||||
return nil, fmt.Errorf("spec is required")
|
|
||||||
}
|
|
||||||
brainCtx, _ := brain.Query(ctx, s.cfg.IngestBaseURL, args.Spec, 3)
|
|
||||||
|
|
||||||
task := fmt.Sprintf(
|
|
||||||
"phase: red\nproject_root: %s\nspec: %s\nmodel: %s\ntest_cmd: %s",
|
|
||||||
args.ProjectRoot, args.Spec, s.resolveModel(args.Model), args.TestCmd,
|
|
||||||
)
|
|
||||||
if brainCtx != "" {
|
|
||||||
task = brainCtx + "\n---\n\n" + task
|
|
||||||
}
|
|
||||||
return s.complete(ctx, s.resolveModel(args.Model), task)
|
|
||||||
}
|
|
||||||
|
|
||||||
type greenArgs struct {
|
|
||||||
ProjectRoot string `json:"project_root"`
|
|
||||||
TestPath string `json:"test_path"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
TestCmd string `json:"test_cmd"`
|
|
||||||
SessionID string `json:"session_id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) handleGreen(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
|
||||||
var args greenArgs
|
|
||||||
if err := json.Unmarshal(raw, &args); err != nil {
|
|
||||||
return nil, fmt.Errorf("parse args: %w", err)
|
|
||||||
}
|
|
||||||
if args.ProjectRoot == "" {
|
|
||||||
return nil, fmt.Errorf("project_root is required")
|
|
||||||
}
|
|
||||||
if args.TestPath == "" {
|
|
||||||
return nil, fmt.Errorf("test_path is required")
|
|
||||||
}
|
|
||||||
task := fmt.Sprintf(
|
|
||||||
"phase: green\nproject_root: %s\ntest_path: %s\nmodel: %s\ntest_cmd: %s",
|
|
||||||
args.ProjectRoot, args.TestPath, s.resolveModel(args.Model), args.TestCmd,
|
|
||||||
)
|
|
||||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "green", task)
|
|
||||||
|
|
||||||
t0 := time.Now()
|
|
||||||
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "green", s.resolveModel(args.Model), t0, result)
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type refactorArgs struct {
|
|
||||||
ProjectRoot string `json:"project_root"`
|
|
||||||
TestPath string `json:"test_path"`
|
|
||||||
ImplPath string `json:"impl_path"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
TestCmd string `json:"test_cmd"`
|
|
||||||
SessionID string `json:"session_id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) handleRefactor(ctx context.Context, raw json.RawMessage) (json.RawMessage, error) {
|
|
||||||
var args refactorArgs
|
|
||||||
if err := json.Unmarshal(raw, &args); err != nil {
|
|
||||||
return nil, fmt.Errorf("parse args: %w", err)
|
|
||||||
}
|
|
||||||
if args.ProjectRoot == "" {
|
|
||||||
return nil, fmt.Errorf("project_root is required")
|
|
||||||
}
|
|
||||||
if args.TestPath == "" {
|
|
||||||
return nil, fmt.Errorf("test_path is required")
|
|
||||||
}
|
|
||||||
if args.ImplPath == "" {
|
|
||||||
return nil, fmt.Errorf("impl_path is required")
|
|
||||||
}
|
|
||||||
task := fmt.Sprintf(
|
|
||||||
"phase: refactor\nproject_root: %s\ntest_path: %s\nimpl_path: %s\nmodel: %s\ntest_cmd: %s",
|
|
||||||
args.ProjectRoot, args.TestPath, args.ImplPath, s.resolveModel(args.Model), args.TestCmd,
|
|
||||||
)
|
|
||||||
task = session.PrependHistory(s.cfg.SessionsDir, args.SessionID, "refactor", task)
|
|
||||||
|
|
||||||
t0 := time.Now()
|
|
||||||
result, err := s.complete(ctx, s.resolveModel(args.Model), task)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
s.logEntry(args.SessionID, args.ProjectRoot, "tdd", "refactor", s.resolveModel(args.Model), t0, result)
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) resolveModel(override string) string {
|
|
||||||
if override != "" {
|
|
||||||
return override
|
|
||||||
}
|
|
||||||
return s.cfg.DefaultModel
|
|
||||||
}
|
|
||||||
|
|
||||||
// complete calls CompleteFunc and returns the text as JSON.
|
|
||||||
func (s *Skill) complete(ctx context.Context, model, task string) (json.RawMessage, error) {
|
|
||||||
if s.cfg.CompleteFunc == nil {
|
|
||||||
return nil, fmt.Errorf("no executor configured")
|
|
||||||
}
|
|
||||||
text, dur, err := s.cfg.CompleteFunc(ctx, model, s.cfg.SkillPrompt, task)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return json.Marshal(map[string]any{"text": text, "model": model, "duration_ms": dur})
|
|
||||||
}
|
|
||||||
|
|
||||||
// logEntry writes a session.Entry for a completed phase if session_id is set.
|
|
||||||
func (s *Skill) logEntry(sessionID, projectRoot, skill, phase, model string, t0 time.Time, raw json.RawMessage) {
|
|
||||||
if sessionID == "" || s.cfg.SessionsDir == "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var msg string
|
|
||||||
var result struct {
|
|
||||||
Text string `json:"text"`
|
|
||||||
}
|
|
||||||
if err := json.Unmarshal(raw, &result); err == nil && len(result.Text) > 0 {
|
|
||||||
msg = result.Text
|
|
||||||
if len(msg) > 200 {
|
|
||||||
msg = msg[:200]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = session.Append(s.cfg.SessionsDir, sessionID, session.Entry{
|
|
||||||
SessionID: sessionID,
|
|
||||||
Timestamp: time.Now(),
|
|
||||||
Skill: skill,
|
|
||||||
Phase: phase,
|
|
||||||
ProjectRoot: projectRoot,
|
|
||||||
FinalStatus: "ok",
|
|
||||||
ModelUsed: model,
|
|
||||||
DurationMs: time.Since(t0).Milliseconds(),
|
|
||||||
Message: msg,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -1,97 +0,0 @@
|
|||||||
package tdd_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/session"
|
|
||||||
"github.com/mathiasbq/supervisor/internal/skills/tdd"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestTDDSkillTools(t *testing.T) {
|
|
||||||
skill := tdd.New(tdd.Config{
|
|
||||||
SkillPrompt: "tdd rules",
|
|
||||||
})
|
|
||||||
tools := skill.Tools()
|
|
||||||
names := make([]string, len(tools))
|
|
||||||
for i, tool := range tools {
|
|
||||||
names[i] = tool.Name
|
|
||||||
}
|
|
||||||
assert.ElementsMatch(t, []string{"tdd_red", "tdd_green", "tdd_refactor"}, names)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDSkillHandleUnknown(t *testing.T) {
|
|
||||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
|
||||||
_, err := skill.Handle(context.Background(), "tdd_unknown", json.RawMessage(`{}`))
|
|
||||||
assert.ErrorContains(t, err, "unknown tool")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDRedRequiresProjectRoot(t *testing.T) {
|
|
||||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
|
||||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"spec":"add two numbers"}`))
|
|
||||||
assert.ErrorContains(t, err, "project_root")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDRedRequiresSpec(t *testing.T) {
|
|
||||||
skill := tdd.New(tdd.Config{SkillPrompt: "t"})
|
|
||||||
_, err := skill.Handle(context.Background(), "tdd_red", json.RawMessage(`{"project_root":"/tmp/proj"}`))
|
|
||||||
assert.ErrorContains(t, err, "spec")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDGreenInjectsSessionHistory(t *testing.T) {
|
|
||||||
sessDir := t.TempDir()
|
|
||||||
require.NoError(t, session.Append(sessDir, "sess-1", session.Entry{
|
|
||||||
SessionID: "sess-1", Skill: "tdd", Phase: "red", FinalStatus: "pass",
|
|
||||||
FilePath: "internal/foo/foo_test.go",
|
|
||||||
Message: "wrote failing test for Foo",
|
|
||||||
}))
|
|
||||||
|
|
||||||
var capturedTask string
|
|
||||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
|
||||||
capturedTask = user
|
|
||||||
return "here is my suggestion", 100, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: sessDir})
|
|
||||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
|
||||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go","test_cmd":"go test ./...","session_id":"sess-1"}`,
|
|
||||||
))
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Contains(t, capturedTask, "## Session history")
|
|
||||||
assert.Contains(t, capturedTask, "wrote failing test for Foo")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDGreenNoHistoryWhenSessionIDEmpty(t *testing.T) {
|
|
||||||
var capturedTask string
|
|
||||||
fakeFn := func(_ context.Context, _, _, user string) (string, int64, error) {
|
|
||||||
capturedTask = user
|
|
||||||
return "suggestion", 50, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn, SessionsDir: t.TempDir()})
|
|
||||||
_, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
|
||||||
`{"project_root":"/tmp","test_path":"internal/foo/foo_test.go"}`,
|
|
||||||
))
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.NotContains(t, capturedTask, "## Session history")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTDDGreenReturnsTextJSON(t *testing.T) {
|
|
||||||
fakeFn := func(_ context.Context, _, _, _ string) (string, int64, error) {
|
|
||||||
return "write a func that adds two ints", 42, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
sk := tdd.New(tdd.Config{SkillPrompt: "tdd", CompleteFunc: fakeFn})
|
|
||||||
raw, err := sk.Handle(context.Background(), "tdd_green", json.RawMessage(
|
|
||||||
`{"project_root":"/tmp","test_path":"foo_test.go"}`,
|
|
||||||
))
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
var result map[string]any
|
|
||||||
require.NoError(t, json.Unmarshal(raw, &result))
|
|
||||||
assert.Equal(t, "write a func that adds two ints", result["text"])
|
|
||||||
assert.Equal(t, float64(42), result["duration_ms"])
|
|
||||||
}
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
package tdd
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
|
|
||||||
"github.com/mathiasbq/supervisor/internal/registry"
|
|
||||||
)
|
|
||||||
|
|
||||||
// CompleteFunc is the function used to call a local model.
|
|
||||||
type CompleteFunc func(ctx context.Context, model, system, user string) (string, int64, error)
|
|
||||||
|
|
||||||
type Config struct {
|
|
||||||
SkillPrompt string
|
|
||||||
CompleteFunc CompleteFunc // nil = no executor (tests that don't reach execute())
|
|
||||||
DefaultModel string
|
|
||||||
SessionsDir string // optional: path to brain/sessions/ for history injection
|
|
||||||
IngestBaseURL string // optional: base URL of ingestion server for brain context
|
|
||||||
}
|
|
||||||
|
|
||||||
type Skill struct {
|
|
||||||
cfg Config
|
|
||||||
}
|
|
||||||
|
|
||||||
func New(cfg Config) *Skill {
|
|
||||||
return &Skill{cfg: cfg}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Skill) Name() string { return "tdd" }
|
|
||||||
|
|
||||||
func (s *Skill) Tools() []registry.ToolDef {
|
|
||||||
schema := func(required []string, props map[string]any) json.RawMessage {
|
|
||||||
b, _ := json.Marshal(map[string]any{
|
|
||||||
"type": "object",
|
|
||||||
"required": required,
|
|
||||||
"properties": props,
|
|
||||||
})
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
strProp := map[string]any{"type": "string"}
|
|
||||||
|
|
||||||
return []registry.ToolDef{
|
|
||||||
{
|
|
||||||
Name: "tdd_red",
|
|
||||||
Description: "Consult a local model for help writing a failing test for the described behavior.",
|
|
||||||
InputSchema: schema(
|
|
||||||
[]string{"project_root", "spec"},
|
|
||||||
map[string]any{
|
|
||||||
"project_root": strProp,
|
|
||||||
"spec": strProp,
|
|
||||||
"model": strProp,
|
|
||||||
"test_cmd": strProp,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "tdd_green",
|
|
||||||
Description: "Consult a local model for implementation ideas to make the test at test_path pass.",
|
|
||||||
InputSchema: schema(
|
|
||||||
[]string{"project_root", "test_path"},
|
|
||||||
map[string]any{
|
|
||||||
"project_root": strProp,
|
|
||||||
"test_path": strProp,
|
|
||||||
"model": strProp,
|
|
||||||
"test_cmd": strProp,
|
|
||||||
"session_id": strProp,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "tdd_refactor",
|
|
||||||
Description: "Consult a local model for refactoring suggestions for impl_path while keeping tests green.",
|
|
||||||
InputSchema: schema(
|
|
||||||
[]string{"project_root", "test_path", "impl_path"},
|
|
||||||
map[string]any{
|
|
||||||
"project_root": strProp,
|
|
||||||
"test_path": strProp,
|
|
||||||
"impl_path": strProp,
|
|
||||||
"model": strProp,
|
|
||||||
"test_cmd": strProp,
|
|
||||||
"session_id": strProp,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
46
scripts/brain-embeddings-init.sql
Normal file
46
scripts/brain-embeddings-init.sql
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
-- One-time DBA setup for the brain vector store on postgres18.
|
||||||
|
--
|
||||||
|
-- Creates the `brain` database, the `brain_app` role, and the pgvector
|
||||||
|
-- extension. The ingestion service connects as brain_app and creates
|
||||||
|
-- the table + HNSW index idempotently at startup (see
|
||||||
|
-- internal/vectorstore.PGStore.Init).
|
||||||
|
--
|
||||||
|
-- Run from koala as the postgres superuser:
|
||||||
|
--
|
||||||
|
-- kubectl exec -n databases postgres18-0 -- \
|
||||||
|
-- psql -U postgres -f /tmp/brain-embeddings-init.sql
|
||||||
|
--
|
||||||
|
-- Or apply with:
|
||||||
|
--
|
||||||
|
-- PASSWORD='<sops-generated>' \
|
||||||
|
-- kubectl exec -i -n databases postgres18-0 -- \
|
||||||
|
-- psql -U postgres -v password="'$PASSWORD'" \
|
||||||
|
-- < scripts/brain-embeddings-init.sql
|
||||||
|
--
|
||||||
|
-- Idempotent: rerunning is safe.
|
||||||
|
|
||||||
|
\set ON_ERROR_STOP on
|
||||||
|
|
||||||
|
-- CREATE DATABASE cannot run inside a DO block (transactional limitation).
|
||||||
|
-- Use \gexec to emit the statement conditionally instead.
|
||||||
|
SELECT 'CREATE DATABASE brain'
|
||||||
|
WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'brain')
|
||||||
|
\gexec
|
||||||
|
|
||||||
|
-- DO blocks can't see psql `:'password'` substitutions (those resolve
|
||||||
|
-- client-side). Use \if to branch at psql level instead.
|
||||||
|
SELECT EXISTS (SELECT FROM pg_roles WHERE rolname = 'brain_app') AS role_exists \gset
|
||||||
|
\if :role_exists
|
||||||
|
ALTER ROLE brain_app WITH PASSWORD :'password';
|
||||||
|
\else
|
||||||
|
CREATE ROLE brain_app LOGIN PASSWORD :'password';
|
||||||
|
\endif
|
||||||
|
|
||||||
|
GRANT ALL PRIVILEGES ON DATABASE brain TO brain_app;
|
||||||
|
|
||||||
|
\c brain
|
||||||
|
|
||||||
|
CREATE EXTENSION IF NOT EXISTS vector;
|
||||||
|
GRANT ALL ON SCHEMA public TO brain_app;
|
||||||
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO brain_app;
|
||||||
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO brain_app;
|
||||||
135
scripts/migrate-brain-halls.sh
Executable file
135
scripts/migrate-brain-halls.sh
Executable file
@@ -0,0 +1,135 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# migrate-brain-halls.sh — move flat brain/wiki/{concepts,entities}/ notes
|
||||||
|
# into the structured brain/wiki/<wing>/<hall>/ layout introduced by
|
||||||
|
# hyperguild#1.
|
||||||
|
#
|
||||||
|
# Reads each note's YAML frontmatter:
|
||||||
|
# type: maps to hall (decision, hypothesis, failure, source → eponymous;
|
||||||
|
# concept, entity, anything else → facts)
|
||||||
|
# domain: maps to wing (sanitised: lowercase, alphanumerics + hyphens);
|
||||||
|
# empty → "general"
|
||||||
|
#
|
||||||
|
# Dry-run by default. Pass --commit to actually move files. Idempotent:
|
||||||
|
# already-migrated notes (already under a Wing dir) are left alone.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# scripts/migrate-brain-halls.sh /path/to/brain # dry-run
|
||||||
|
# scripts/migrate-brain-halls.sh --commit /path/to/brain # apply
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
COMMIT=0
|
||||||
|
BRAIN=""
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--commit) COMMIT=1 ;;
|
||||||
|
-h|--help)
|
||||||
|
sed -n '2,18p' "$0"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*) BRAIN="$arg" ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z "$BRAIN" ]]; then
|
||||||
|
echo "error: brain directory required" >&2
|
||||||
|
echo "usage: $0 [--commit] <brain-dir>" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -d "$BRAIN" ]]; then
|
||||||
|
echo "error: $BRAIN is not a directory" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
WIKI="$BRAIN/wiki"
|
||||||
|
if [[ ! -d "$WIKI" ]]; then
|
||||||
|
echo "no $WIKI/ — nothing to migrate"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
sanitise() {
|
||||||
|
# lowercase, replace non-alnum with hyphen, collapse hyphens, trim
|
||||||
|
local s
|
||||||
|
s=$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]' \
|
||||||
|
| sed -E 's/[^a-z0-9]+/-/g; s/^-+//; s/-+$//; s/-+/-/g')
|
||||||
|
printf '%s' "$s"
|
||||||
|
}
|
||||||
|
|
||||||
|
# extract_frontmatter_value <file> <key>
|
||||||
|
# Echoes the value (trimmed, unquoted) of `key:` from a leading YAML
|
||||||
|
# frontmatter block. Empty if absent or no frontmatter.
|
||||||
|
extract_frontmatter_value() {
|
||||||
|
awk -v key="$2" '
|
||||||
|
BEGIN { in_fm = 0; first = 1 }
|
||||||
|
/^---[[:space:]]*$/ {
|
||||||
|
if (first) { in_fm = 1; first = 0; next }
|
||||||
|
if (in_fm) { exit }
|
||||||
|
}
|
||||||
|
in_fm {
|
||||||
|
idx = index($0, ":")
|
||||||
|
if (idx == 0) next
|
||||||
|
k = substr($0, 1, idx-1)
|
||||||
|
v = substr($0, idx+1)
|
||||||
|
gsub(/^[[:space:]]+|[[:space:]]+$/, "", k)
|
||||||
|
gsub(/^[[:space:]]+|[[:space:]]+$/, "", v)
|
||||||
|
gsub(/^["'\'']|["'\'']$/, "", v)
|
||||||
|
if (k == key) { print v; exit }
|
||||||
|
}
|
||||||
|
' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
hall_for_type() {
|
||||||
|
case "$1" in
|
||||||
|
decision|decisions) echo "decisions" ;;
|
||||||
|
hypothesis|hypotheses) echo "hypotheses" ;;
|
||||||
|
failure|failures) echo "failures" ;;
|
||||||
|
source|sources) echo "sources" ;;
|
||||||
|
*) echo "facts" ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
declare -i moved=0 skipped=0
|
||||||
|
|
||||||
|
migrate_source_dir() {
|
||||||
|
local src="$1"
|
||||||
|
[[ -d "$src" ]] || return 0
|
||||||
|
while IFS= read -r -d '' f; do
|
||||||
|
local typ domain wing hall slug dest
|
||||||
|
typ=$(extract_frontmatter_value "$f" type)
|
||||||
|
domain=$(extract_frontmatter_value "$f" domain)
|
||||||
|
hall=$(hall_for_type "$typ")
|
||||||
|
wing=$(sanitise "${domain:-general}")
|
||||||
|
[[ -z "$wing" ]] && wing="general"
|
||||||
|
slug=$(basename "$f" .md)
|
||||||
|
dest="$WIKI/$wing/$hall/$slug.md"
|
||||||
|
|
||||||
|
if [[ "$f" == "$dest" ]]; then
|
||||||
|
skipped=$((skipped + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -e "$dest" ]]; then
|
||||||
|
echo "skip (target exists): $f → $dest"
|
||||||
|
skipped=$((skipped + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$COMMIT" -eq 1 ]]; then
|
||||||
|
mkdir -p "$(dirname "$dest")"
|
||||||
|
git -C "$BRAIN" mv "$f" "$dest" 2>/dev/null || mv "$f" "$dest"
|
||||||
|
fi
|
||||||
|
echo "move: $f → $dest"
|
||||||
|
moved=$((moved + 1))
|
||||||
|
done < <(find "$src" -maxdepth 1 -type f -name '*.md' -print0)
|
||||||
|
}
|
||||||
|
|
||||||
|
migrate_source_dir "$WIKI/concepts"
|
||||||
|
migrate_source_dir "$WIKI/entities"
|
||||||
|
|
||||||
|
echo
|
||||||
|
if [[ "$COMMIT" -eq 1 ]]; then
|
||||||
|
echo "moved=$moved skipped=$skipped (committed)"
|
||||||
|
else
|
||||||
|
echo "moved=$moved skipped=$skipped (dry-run — pass --commit to apply)"
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user