From 7dfe8a792e6b26e42115f434710143b3ccd23216 Mon Sep 17 00:00:00 2001 From: Mathias Bergqvist Date: Tue, 19 May 2026 23:02:07 +0200 Subject: [PATCH] feat: initial scaffold with context adapters and litellm pkg Co-Authored-By: Claude Sonnet 4.6 --- .aider.conf.yml | 2 + .aider.conventions.md | 255 ++++++++++++++++++++++++++++++++++ .context/PROJECT.md | 22 +++ .context/mcp.json | 26 ++++ .context/system-prompt.txt | 262 +++++++++++++++++++++++++++++++++++ .cursorrules | 258 ++++++++++++++++++++++++++++++++++ .env.example | 9 ++ .gitignore | 11 ++ AGENTS.md | 255 ++++++++++++++++++++++++++++++++++ CLAUDE.md | 22 +++ README.md | 25 ++++ Taskfile.yml | 36 +++++ cmd/__PROJECT_NAME__/main.go | 88 ++++++++++++ go.mod | 8 ++ pkg/litellm/model.go | 250 +++++++++++++++++++++++++++++++++ pkg/litellm/telemetry.go | 71 ++++++++++ scripts/context-sync.sh | 201 +++++++++++++++++++++++++++ 17 files changed, 1801 insertions(+) create mode 100644 .aider.conf.yml create mode 100644 .aider.conventions.md create mode 100644 .context/PROJECT.md create mode 100644 .context/mcp.json create mode 100644 .context/system-prompt.txt create mode 100644 .cursorrules create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 AGENTS.md create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 Taskfile.yml create mode 100644 cmd/__PROJECT_NAME__/main.go create mode 100644 go.mod create mode 100644 pkg/litellm/model.go create mode 100644 pkg/litellm/telemetry.go create mode 100755 scripts/context-sync.sh diff --git a/.aider.conf.yml b/.aider.conf.yml new file mode 100644 index 0000000..a16f762 --- /dev/null +++ b/.aider.conf.yml @@ -0,0 +1,2 @@ +read: .aider.conventions.md +auto-commits: false diff --git a/.aider.conventions.md b/.aider.conventions.md new file mode 100644 index 0000000..42ccd32 --- /dev/null +++ b/.aider.conventions.md @@ -0,0 +1,255 @@ +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.context/PROJECT.md b/.context/PROJECT.md new file mode 100644 index 0000000..b69a2a8 --- /dev/null +++ b/.context/PROJECT.md @@ -0,0 +1,22 @@ +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.context/mcp.json b/.context/mcp.json new file mode 100644 index 0000000..c9514c5 --- /dev/null +++ b/.context/mcp.json @@ -0,0 +1,26 @@ +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp", + "description": "Project knowledge base — vector + graph retrieval" + }, + "brain": { + "type": "http", + "url": "https://brain-mcp.d-ma.be/mcp", + "headers": { + "Authorization": "Bearer ${BRAIN_MCP_TOKEN}" + } + }, + "gitea": { + "type": "http", + "url": "https://git-mcp.d-ma.be/mcp", + "headers": { + "Authorization": "Bearer ${GITEA_MCP_TOKEN}" + } + }, + "infra": { + "type": "http", + "url": "https://infra-mcp.d-ma.be/mcp" + } + } +} diff --git a/.context/system-prompt.txt b/.context/system-prompt.txt new file mode 100644 index 0000000..5dff007 --- /dev/null +++ b/.context/system-prompt.txt @@ -0,0 +1,262 @@ +You are a coding assistant working on a specific project. +Follow all conventions from both the root agent context and project context. + +--- + +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. + +--- diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..ec680d6 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,258 @@ +# Cursor rules — auto-generated +# Do not edit. Run: task context:sync + +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..979e079 --- /dev/null +++ b/.env.example @@ -0,0 +1,9 @@ +# LiteLLM / model +LITELLM_API_KEY=your-key-here +LITELLM_BASE_URL=https://llm-api.d-ma.be/v1 +LITELLM_MODEL=berget/llama-3.3-70b + +# Observability (optional — omit to disable tracing) +OTLP_ENDPOINT=http://jaeger.d-ma.be:4318 +ADK_SERVICE_NAME=__PROJECT_NAME__ +ADK_SERVICE_VERSION=0.1.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3878602 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.exe +*.exe~ +*.dll +*.so +*.dylib +*.test +*.out +go.work +go.work.sum +.env +bin/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..42ccd32 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,255 @@ +# Agent context — Mathias workspace + + + +## Who I am + +I'm Mathias, a digital product manager and technology consultant based in Sweden. +I build software, research emerging tech, and deliver consulting engagements +for clients under NDA. I work across AI/ML, financial automation, web applications, +and climate/sustainability tech. + +## How I work with agents + +- I think like a product manager — I care about *why* before *how* +- I want agents to be opinionated and push back, not just execute blindly +- I prefer concise responses; skip ceremony and get to the point +- When I say "build this", I mean production-quality with tests, not a demo +- Ask me before making irreversible changes or adding heavy dependencies +- I work with confidential client data — never send it to cloud APIs unless I explicitly say it's OK + +## Behavior rules + +These rules apply to every task across every project, regardless of harness. + +1. **No assumptions.** Don't hide confusion — surface it. Surface tradeoffs explicitly. + Think before coding; if the problem is unclear, ask or state assumptions before acting. +2. **Minimum viable code.** Solve with the smallest change that works. Nothing + speculative, no "while we're here" cleanups, no premature abstractions. Simplicity first. +3. **Surgical changes.** Touch only what the task requires. Leave unrelated code, + files, and formatting alone. Diffs should be small and reviewable. +4. **Goal-driven execution.** Define clear success criteria up front for every task. + Loop — implement, verify, refine — until those criteria are met. Don't claim + completion without evidence (tests pass, command output, observed behavior). +5. **Trunk-Based Development — commit directly to main.** Every commit is one + logical change (one tool, one fix, one test) with passing tests. Main is always + deployable. Never create long-lived feature branches. + + **Exception — parallel agents on same repo:** If another agent is known to be + actively working on the same repo simultaneously, create a short-lived branch + (`agent/`), finish the task, and merge to main within the same + session. Do not leave agent branches open between sessions. + + **Exception — external contributor or client four-eyes requirement:** Use + PR flow only when a human reviewer outside the project is required. Document + the reason in PROJECT.md. + +## Default stack + +| Layer | Default | Fallback | Last resort | +|-------|---------|----------|-------------| +| Language | Go | Python | TypeScript, Java, C | +| UI | HTMX + Templ | Server-rendered HTML | React (only if SPA is justified) | +| Build | Task (taskfile.dev) | Make | — | +| Containers | Docker Compose (dev), k3s (prod) | — | — | +| DB | PostgreSQL + sqlc | SQLite | — | +| Search | pgvector (vector), BM25 | Qdrant (when >1M vectors or hybrid retrieval) | — | +| Logging | slog (structured) | — | — | +| Testing | Table-driven, testify | — | — | +| Agents (Go) | google.golang.org/adk + pkg/litellm adapter | — | — | + +Exploratory: Rust, Zig — I'll tell you when I want these. + +## Code conventions + +- **Go style**: golines, gofumpt, golangci-lint +- **Errors**: `fmt.Errorf("operation: %w", err)` — never naked, never log-and-return +- **Naming**: stdlib conventions, no stuttering +- **Architecture**: prefer stdlib over frameworks, constructor injection, env-var config parsed into typed structs +- **Git**: conventional commits (`feat:`, `fix:`, `chore:`), commit directly to main, + one logical change per commit, CI is the quality gate +- **Never**: long-lived feature branches, PRs for solo work, direct push without + passing `task check` locally first +- **Security**: no secrets in code, govulncheck before adding deps, SOPS for encrypted config +- **Dependencies**: prefer stdlib. testify, slog, templ, sqlc, google.golang.org/adk (agent projects only) are pre-approved; anything else needs justification in the commit message + +## Infrastructure + +Three machines on Tailscale: + +| Machine | Role | Key specs | +|---------|------|-----------| +| koala | GPU inference, heavy compute | RTX 5070, runs k3s + llama-swap + shared postgres18/pgvector | +| iguana | Services, builds | M2 Ultra Mac | +| flamingo | Daily driver, edge | Mac mini, ~/dev is here | + +- **Model routing**: LiteLLM in front of llama-swap (local) + cloud APIs (when permitted) +- **Orchestration**: k3s cluster across all three machines +- **Networking**: Tailscale mesh + +## Project landscape + +All development repos live at `~/dev/` (softlink from `~/Documents/local-dev/`). + +Organized in thematic folders: + +| Folder | Focus | Count | +|--------|-------|-------| +| `GO/` | Go web frameworks, API integrations, learning projects | ~10 | +| `AI/` | ML research, AI frameworks (FinRL, DSPy, crawl4ai) | ~6 | +| `AGENTS/` | Autonomous agents, coding agents, MCP servers, infra | ~15 | +| `QKX/` | Invoice processing, financial automation, payment systems | ~13 | +| `XT/` | Climate data, sustainability (Klimatkollen, Garbo) | ~2 | + +See `~/dev/PROJECT_SUMMARY.md` for detailed descriptions of each project. + +### Key active projects + +- **super-koala** (`AGENTS/`) — multi-component agent stack with LangGraph, DSPy, MCP +- **azure-tiger** (`QKX/`) — invoice extraction → ISO 20022 payment instructions +- **gocrwl** (`AGENTS/`) — Go web crawler with containerized deployment +- **koala-ai-stack** (`AGENTS/`) — local AI server infrastructure management +- **klimatkollen** (`XT/`) — Swedish municipal climate data platform + +## Knowledge base — actively use it + +A persistent brain (BM25 search + LLM-synthesised Q&A) survives across sessions, +hosts, and harnesses. It holds 100+ hard-won entries: infra incident postmortems, +Go pitfalls, framework gotchas, design principles, ADRs. **It is not optional +reference material — query it actively, not just when explicitly told.** + +### When to query (treat as a reflex) + +- **Before** starting a non-trivial task — search for prior art with the symptom + AND the system component ("how did we solve X in Y?"). 5 seconds beats 5 hours. +- **When debugging** — search for the error string, the stack frame, the affected + service. Past you may have already paid this tax. +- **Before adopting** a pattern, library, framework, or model name — check if it + was tried and rejected, or what the integration footguns are. +- **When making architectural decisions** — search for the domain + "ADR" or + "decision" to find prior reasoning before re-deriving it. +- **When a recommendation feels novel** — challenge yourself: "has this been + documented?" The brain often has it. + +### When to write + +After you discover something that **future-you would forget** and that **isn't +recoverable from the code, git log, or PR description alone**: + +- Bugs whose root cause is non-obvious and generalisable beyond this project. +- Framework / library / model-name quirks that bit you and would bite anyone. +- Design principles validated under fire (e.g. "every `_get` needs a `_list`"). +- Postmortems for incidents: what broke, why, how diagnosed, what to do next time. + +DON'T write project status, sprint progress, PR summaries, or "what I did this +session" — those rot fast and the originals are in git/gitea anyway. Brain +entries that age well are about *why*, *how to avoid*, and *what to do when*. + +### How to access (per harness) + +| Harness | Query | Write | +|---------|-------|-------| +| **Claude Code, Claude Desktop** | `brain_query` (BM25), `brain_answer` (LLM-synth + sources) MCP tools | `brain_write` MCP tool | +| **Crush, Pi, Antigravity, other MCP-capable** | same MCP server: `ingestion-brain` (via the `mcp__*_brain__*` namespace once authenticated) | same | +| **Anything HTTP-only (curl, scripts)** | `POST https://brain-mcp.d-ma.be/query` with `{"query":"..."}` (auth via `BRAIN_MCP_TOKEN`) | `POST .../write` with `{"content":"...","filename":"..."}` | +| **Browser / human inspection** | `https://gitea.d-ma.be/mathias/hyperguild` → `knowledge/` and `wiki/` markdown files | + +- **Scoping**: defaults to `public` collection; client projects filter to `{client}` + `public`. +- **Routing**: brain_answer's LLM uses berget.ai as primary, iguana ollama as + fallback. Both are configurable in the `supervisor/ingestion-deployment.yaml` + on the koala k3s cluster; don't hardcode local-only model names into the + berget URL (see knowledge entry on namespace mismatches). + +### Quick reflex checks + +If you find yourself about to say any of these out loud, you owe yourself a brain query first: + +- "I think the issue might be..." +- "Let me try X and see..." +- "I'll just write a script to..." +- "This is probably a new bug..." +- "Has anyone done this before?" — *yes, probably, go check.* + +## Client work rules + +When working on a project tagged with a client name: +1. Never send code, data, or context to cloud APIs — use local models only +2. Never reference other client projects or their data +3. Keep all artifacts within the client's git org / directory +4. Treat everything as confidential unless told otherwise + +## Harness-agnostic principles + +This context is designed to work with any AI coding tool: +- Claude Code, Cursor, Aider, Open WebUI, Charmbracelet Mods/Crush +- Pi Coding Agent, Mistral Vibe, Antigravity +- Any tool that accepts a system prompt or reads a markdown context file + +The canonical source is always `.context/AGENT.md` (root) and `.context/PROJECT.md` (per-project). +Derived files are committed (see *How context propagates* below) so a `git pull` on any host yields full agent context with no setup. + +## How context propagates + +Canonical sources of truth: +- Universal: `~/dev/.context/AGENT.md` (this file) +- Project: `/.context/PROJECT.md` (per-repo) + +Derived files (committed, regenerated by `task context:sync`): +- `CLAUDE.md`, `AGENTS.md`, `.cursorrules`, `.aider.conventions.md`, + `.context/system-prompt.txt` + +Workflow: +1. Edit a canonical file. Run `task context:sync`. Commit canonical and + derived together. Push. +2. On any other host, `git pull` brings both. Claude Code (tree-walking) + uses `CLAUDE.md`; Crush / Pi / Antigravity (cwd-only) use `AGENTS.md`; + Cursor uses `.cursorrules`; Aider uses `.aider.conventions.md`. +3. `task check` runs `context:sync` then asserts `git status --porcelain` + is empty over the derived files (catches both modified-tracked drift + and missing-untracked adapters). A drift fails the check with a + message telling you to stage the regenerated files. + +Behavior rules in this file and per-project rules in `PROJECT.md` apply +unconditionally on every host, every harness. + +## Engineering Skills + +Shared engineering skills are available in `~/dev/.skills/`. Load on demand via the index. + +See `~/dev/.skills/SKILLS_INDEX.md` for the full list with descriptions and "use when" triggers. + +Key skills: +- **TDD**: always write tests first — load `tdd` skill +- **Code Review**: load `code-review` skill before any review +- **SOLID/Clean Code**: load `solid` or `clean-code` skill for design work +- **Problem first**: load `problem-analysis` skill before coding non-trivial features + +--- + +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b69a2a8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,22 @@ +# __PROJECT_NAME__ + +## Identity + +- **Name**: __PROJECT_NAME__ +- **Owner**: Mathias +- **Client**: personal +- **Repo**: gitea.d-ma.be/mathias/__PROJECT_NAME__ +- **Status**: active + +## Stack + +Go + ADK + LiteLLM. See `~/dev/.context/AGENT.md` for cross-project conventions. + +## Agent + +TODO: describe what this agent does, what tools it has, and what it's responsible for. + +## Observability + +Traces → Jaeger via `OTLP_ENDPOINT`. Set `ADK_SERVICE_NAME=__PROJECT_NAME__` per deployment. +Spans emitted: `invoke_agent`, `generate_content`. Tool spans require custom callbacks. diff --git a/README.md b/README.md new file mode 100644 index 0000000..99ca285 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# __PROJECT_NAME__ + +Go agent built on [Google ADK](https://google.golang.org/adk) with a LiteLLM adapter for local model routing. + +## Quick start + +```bash +cp .env.example .env +# edit .env with your LITELLM_API_KEY +go mod tidy +task run +``` + +## Observability + +Set `OTLP_ENDPOINT=http://jaeger.d-ma.be:4318` to emit traces. Each invocation produces: +- `invoke_agent __PROJECT_NAME__` span +- `generate_content ` child span with `gen_ai.request.model` attribute + +## Structure + +``` +cmd/__PROJECT_NAME__/ agent entrypoint +pkg/litellm/ OpenAI-compat ADK adapter + OTLP telemetry helper +``` diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 0000000..2767af8 --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,36 @@ +version: '3' + +tasks: + build: + desc: Build the agent binary + cmds: [go build -o bin/__PROJECT_NAME__ ./cmd/__PROJECT_NAME__] + + run: + desc: Run the agent (requires .env) + deps: [build] + cmds: [./bin/__PROJECT_NAME__] + + test: + desc: Run all tests + cmds: [go test ./... -race] + + lint: + cmds: [golangci-lint run ./...] + + check: + desc: Lint, vet, and test (used by CI) + cmds: + - golangci-lint run ./... + - go vet ./... + - go test ./... -race -count=1 + + context:sync: + desc: Regenerate all harness-specific context files + cmds: + - bash scripts/context-sync.sh + context:sync:claude: + cmds: [bash scripts/context-sync.sh claude] + context:sync:agents: + cmds: [bash scripts/context-sync.sh agents] + context:sync:cursor: + cmds: [bash scripts/context-sync.sh cursor] diff --git a/cmd/__PROJECT_NAME__/main.go b/cmd/__PROJECT_NAME__/main.go new file mode 100644 index 0000000..2c3b921 --- /dev/null +++ b/cmd/__PROJECT_NAME__/main.go @@ -0,0 +1,88 @@ +package main + +import ( + "context" + "fmt" + "os" + + "google.golang.org/adk/agent" + "google.golang.org/adk/agent/llmagent" + "google.golang.org/adk/runner" + "google.golang.org/adk/session" + "google.golang.org/genai" + + "__MODULE_PATH__/pkg/litellm" +) + +func main() { + ctx := context.Background() + + shutdown, err := litellm.SetupTelemetry(ctx) + if err != nil { + fmt.Fprintf(os.Stderr, "telemetry: %v\n", err) + os.Exit(1) + } + defer shutdown(ctx) + + llm := litellm.New( + env("LITELLM_MODEL", "berget/llama-3.3-70b"), + env("LITELLM_BASE_URL", "https://llm-api.d-ma.be/v1"), + mustEnv("LITELLM_API_KEY"), + ) + + ag, err := llmagent.New(llmagent.Config{ + Name: "__PROJECT_NAME__", + Description: "TODO: describe what this agent does", + Model: llm, + Instruction: "You are a helpful assistant.", + }) + if err != nil { + fmt.Fprintf(os.Stderr, "agent: %v\n", err) + os.Exit(1) + } + + r, err := runner.New(runner.Config{ + AppName: "__PROJECT_NAME__", + Agent: ag, + SessionService: session.InMemoryService(), + AutoCreateSession: true, + }) + if err != nil { + fmt.Fprintf(os.Stderr, "runner: %v\n", err) + os.Exit(1) + } + + msg := genai.NewContentFromText("Hello!", "user") + events := r.Run(ctx, "user-1", "session-1", msg, agent.RunConfig{}) + + for ev, err := range events { + if err != nil { + fmt.Fprintf(os.Stderr, "run: %v\n", err) + os.Exit(1) + } + if ev == nil || ev.Content == nil { + continue + } + for _, p := range ev.Content.Parts { + if p != nil && p.Text != "" { + fmt.Println(p.Text) + } + } + } +} + +func env(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + +func mustEnv(key string) string { + v := os.Getenv(key) + if v == "" { + fmt.Fprintf(os.Stderr, "required env var %s not set\n", key) + os.Exit(1) + } + return v +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b50538b --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module __MODULE_PATH__ + +go 1.26 + +require ( + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + google.golang.org/adk v1.2.0 +) diff --git a/pkg/litellm/model.go b/pkg/litellm/model.go new file mode 100644 index 0000000..6970240 --- /dev/null +++ b/pkg/litellm/model.go @@ -0,0 +1,250 @@ +package litellm + +// Model implements google.golang.org/adk/model.LLM against any +// OpenAI-compatible endpoint (LiteLLM, Ollama, vLLM, etc.). +// +// The official Go ADK v1.x ships only Gemini adapters. This adapter +// implements the official interface directly via net/http — no extra deps. + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "iter" + "net/http" + + adkmodel "google.golang.org/adk/model" + "google.golang.org/genai" +) + +// Model is an ADK-compatible LLM backed by an OpenAI-compatible endpoint. +type Model struct { + name string + baseURL string + apiKey string + client *http.Client +} + +// New creates an OpenAI-compatible ADK model. +// name is the model identifier sent in requests (e.g. "berget/llama-3.3-70b"). +// baseURL is the API base without path (e.g. "https://llm-api.d-ma.be/v1"). +func New(name, baseURL, apiKey string) *Model { + return &Model{name: name, baseURL: baseURL, apiKey: apiKey, client: &http.Client{}} +} + +func (m *Model) Name() string { return m.name } + +// --- OpenAI wire types (minimal subset ADK uses) --- + +type oaiMessage struct { + Role string `json:"role"` + Content string `json:"content,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` + ToolCalls []oaiToolCall `json:"tool_calls,omitempty"` +} + +type oaiToolCall struct { + ID string `json:"id"` + Type string `json:"type"` + Function oaiFunctionCall `json:"function"` +} + +type oaiFunctionCall struct { + Name string `json:"name"` + Arguments string `json:"arguments"` +} + +type oaiTool struct { + Type string `json:"type"` + Function oaiFunctionDef `json:"function"` +} + +type oaiFunctionDef struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Parameters json.RawMessage `json:"parameters,omitempty"` +} + +type oaiRequest struct { + Model string `json:"model"` + Messages []oaiMessage `json:"messages"` + Tools []oaiTool `json:"tools,omitempty"` +} + +type oaiChoice struct { + Message oaiMessage `json:"message"` + FinishReason string `json:"finish_reason"` +} + +type oaiResponse struct { + Choices []oaiChoice `json:"choices"` + Error *struct { + Message string `json:"message"` + } `json:"error,omitempty"` +} + +func (m *Model) GenerateContent(ctx context.Context, req *adkmodel.LLMRequest, _ bool) iter.Seq2[*adkmodel.LLMResponse, error] { + return func(yield func(*adkmodel.LLMResponse, error) bool) { + msgs := contentsToMessages(req.Contents) + tools := adk2oaiTools(req) + + oaiReq := oaiRequest{Model: m.name, Messages: msgs, Tools: tools} + + body, err := json.Marshal(oaiReq) + if err != nil { + yield(nil, fmt.Errorf("marshal: %w", err)) + return + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, + m.baseURL+"/chat/completions", bytes.NewReader(body)) + if err != nil { + yield(nil, fmt.Errorf("new request: %w", err)) + return + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Authorization", "Bearer "+m.apiKey) + + resp, err := m.client.Do(httpReq) + if err != nil { + yield(nil, fmt.Errorf("http: %w", err)) + return + } + defer resp.Body.Close() + + raw, err := io.ReadAll(resp.Body) + if err != nil { + yield(nil, fmt.Errorf("read body: %w", err)) + return + } + if resp.StatusCode != http.StatusOK { + yield(nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(raw))) + return + } + + var oaiResp oaiResponse + if err := json.Unmarshal(raw, &oaiResp); err != nil { + yield(nil, fmt.Errorf("unmarshal: %w", err)) + return + } + if oaiResp.Error != nil { + yield(nil, fmt.Errorf("api error: %s", oaiResp.Error.Message)) + return + } + if len(oaiResp.Choices) == 0 { + yield(nil, fmt.Errorf("no choices in response")) + return + } + + content := oaiChoiceToContent(oaiResp.Choices[0]) + yield(&adkmodel.LLMResponse{Content: content, TurnComplete: true}, nil) + } +} + +func contentsToMessages(contents []*genai.Content) []oaiMessage { + var msgs []oaiMessage + for _, c := range contents { + if c == nil { + continue + } + var textBuf bytes.Buffer + var toolCalls []oaiToolCall + + for _, p := range c.Parts { + if p == nil { + continue + } + if p.Text != "" { + textBuf.WriteString(p.Text) + } + if p.FunctionCall != nil { + argBytes, _ := json.Marshal(p.FunctionCall.Args) + toolCalls = append(toolCalls, oaiToolCall{ + ID: p.FunctionCall.ID, + Type: "function", + Function: oaiFunctionCall{ + Name: p.FunctionCall.Name, + Arguments: string(argBytes), + }, + }) + } + if p.FunctionResponse != nil { + respBytes, _ := json.Marshal(p.FunctionResponse.Response) + msgs = append(msgs, oaiMessage{ + Role: "tool", + Content: string(respBytes), + ToolCallID: p.FunctionResponse.ID, + }) + } + } + + if len(toolCalls) > 0 || textBuf.Len() > 0 { + msg := oaiMessage{Role: c.Role} + if c.Role == "model" { + msg.Role = "assistant" + } + msg.Content = textBuf.String() + msg.ToolCalls = toolCalls + msgs = append(msgs, msg) + } + } + return msgs +} + +func adk2oaiTools(req *adkmodel.LLMRequest) []oaiTool { + if len(req.Tools) == 0 { + return nil + } + var tools []oaiTool + for name, def := range req.Tools { + raw, _ := json.Marshal(def) + var m map[string]json.RawMessage + _ = json.Unmarshal(raw, &m) + var desc string + if d, ok := m["description"]; ok { + _ = json.Unmarshal(d, &desc) + } + params := m["parameters"] + // Some endpoints (e.g. Berget) reject null parameters for zero-arg tools. + if len(params) == 0 || string(params) == "null" { + params = json.RawMessage(`{"type":"object","properties":{}}`) + } + tools = append(tools, oaiTool{ + Type: "function", + Function: oaiFunctionDef{ + Name: name, + Description: desc, + Parameters: params, + }, + }) + } + return tools +} + +func oaiChoiceToContent(choice oaiChoice) *genai.Content { + msg := choice.Message + var parts []*genai.Part + + if msg.Content != "" { + parts = append(parts, &genai.Part{Text: msg.Content}) + } + for _, tc := range msg.ToolCalls { + var args map[string]any + _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) + parts = append(parts, &genai.Part{ + FunctionCall: &genai.FunctionCall{ + ID: tc.ID, + Name: tc.Function.Name, + Args: args, + }, + }) + } + + role := msg.Role + if role == "assistant" { + role = "model" + } + return &genai.Content{Role: role, Parts: parts} +} diff --git a/pkg/litellm/telemetry.go b/pkg/litellm/telemetry.go new file mode 100644 index 0000000..0c14a7b --- /dev/null +++ b/pkg/litellm/telemetry.go @@ -0,0 +1,71 @@ +package litellm + +import ( + "context" + "fmt" + "os" + "time" + + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.36.0" + "google.golang.org/adk/telemetry" +) + +// SetupTelemetry wires ADK OTLP tracing from environment variables. +// +// Reads: +// - OTLP_ENDPOINT full URL base, e.g. http://jaeger.d-ma.be:4318 (skip if empty) +// - ADK_SERVICE_NAME service name in Jaeger (default: "agent") +// - ADK_SERVICE_VERSION semver label (default: "0.1.0") +// +// Returns a shutdown func to call on exit with a short-timeout context. +// No-op (nil error, noop shutdown) when OTLP_ENDPOINT is unset. +func SetupTelemetry(ctx context.Context) (shutdown func(context.Context) error, err error) { + endpoint := os.Getenv("OTLP_ENDPOINT") + if endpoint == "" { + return func(context.Context) error { return nil }, nil + } + + svcName := os.Getenv("ADK_SERVICE_NAME") + if svcName == "" { + svcName = "agent" + } + svcVersion := os.Getenv("ADK_SERVICE_VERSION") + if svcVersion == "" { + svcVersion = "0.1.0" + } + + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpointURL(endpoint+"/v1/traces"), + ) + if err != nil { + return nil, fmt.Errorf("otlp exporter: %w", err) + } + + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceName(svcName), + semconv.ServiceVersion(svcVersion), + ), + ) + if err != nil { + return nil, fmt.Errorf("resource: %w", err) + } + + providers, err := telemetry.New(ctx, + telemetry.WithSpanProcessors(sdktrace.NewBatchSpanProcessor(exporter)), + telemetry.WithResource(res), + ) + if err != nil { + return nil, fmt.Errorf("telemetry.New: %w", err) + } + providers.SetGlobalOtelProviders() + + return func(ctx context.Context) error { + shutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + return providers.Shutdown(shutCtx) + }, nil +} diff --git a/scripts/context-sync.sh b/scripts/context-sync.sh new file mode 100755 index 0000000..4f7300e --- /dev/null +++ b/scripts/context-sync.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash +# Generates harness-specific context files from .context/PROJECT.md +# Project-level script — run from a project directory. +# +# For Claude Code: generates project-only CLAUDE.md (it inherits root via tree walk) +# For everything else: concatenates root AGENT.md + project PROJECT.md +# +# Usage: ./scripts/context-sync.sh [--force] [adapter...] +# Task: task context:sync +# +# Override root context: ROOT_CONTEXT=~/dev/.context/AGENT.md ./scripts/context-sync.sh + +set -euo pipefail + +# Parse --force flag and collect adapter names separately +FORCE=false +ADAPTERS=() +for _arg in "$@"; do + case "$_arg" in + --force) FORCE=true ;; + *) ADAPTERS+=("$_arg") ;; + esac +done + +PROJECT_FILE=".context/PROJECT.md" + +# Walk up to find root .context/AGENT.md +find_root_context() { + local dir + dir="$(pwd)" + while [ "$dir" != "/" ]; do + dir="$(dirname "$dir")" + if [ -f "$dir/.context/AGENT.md" ]; then + echo "$dir/.context/AGENT.md" + return + fi + done + echo "" +} + +ROOT_CONTEXT="${ROOT_CONTEXT:-$(find_root_context)}" + +if [ ! -f "$PROJECT_FILE" ]; then + echo "Error: $PROJECT_FILE not found. Are you in a project root?" + exit 1 +fi + +# Pre-flight: reject unfilled {{...}} placeholders unless --force +if [ "$FORCE" = false ]; then + _placeholders=$(grep -n '{{[^}]*}}' "$PROJECT_FILE" 2>/dev/null || true) + if [ -n "$_placeholders" ]; then + echo "Error: unfilled placeholders in $PROJECT_FILE:" >&2 + while IFS= read -r _match; do + _lineno="${_match%%:*}" + _content="${_match#*:}" + _token=$(printf '%s' "$_content" | grep -o '{{[^}]*}}' | head -1) + echo " $PROJECT_FILE:$_lineno: unfilled placeholder $_token" >&2 + done <<< "$_placeholders" + echo "" >&2 + echo "Fill these placeholders, then re-run: task context:sync" >&2 + echo "To bypass validation: bash scripts/context-sync.sh --force" >&2 + exit 1 + fi +fi + +if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + echo " Root context: $ROOT_CONTEXT" +else + echo " No root AGENT.md found (project context only)" +fi + +# Emit root context + separator +root_block() { + if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + cat "$ROOT_CONTEXT" + echo "" + echo "---" + echo "" + fi +} + +# ── Claude Code ────────────────────────────────────────────── +# Claude Code walks up the tree — it finds ~/dev/CLAUDE.md automatically. +# Project-level CLAUDE.md only needs project-specific context. +generate_claude() { + cat "$PROJECT_FILE" > CLAUDE.md + echo " → CLAUDE.md (project-only; Claude Code inherits root)" +} + +# ── AGENTS.md (Crush, Pi, Antigravity) ────────────────────── +# These tools read AGENTS.md from cwd but don't walk up. +# Concatenate root + project. +generate_agents() { + { root_block; cat "$PROJECT_FILE"; } > AGENTS.md + echo " → AGENTS.md (root + project; Crush, Pi, Antigravity)" +} + +# ── Cursor ─────────────────────────────────────────────────── +generate_cursor() { + { + echo "# Cursor rules — auto-generated" + echo "# Do not edit. Run: task context:sync" + echo "" + root_block + cat "$PROJECT_FILE" + } > .cursorrules + echo " → .cursorrules (root + project)" +} + +# ── Aider ──────────────────────────────────────────────────── +generate_aider() { + { root_block; cat "$PROJECT_FILE"; } > .aider.conventions.md + if [ ! -f .aider.conf.yml ]; then + cat > .aider.conf.yml << 'YAML' +read: .aider.conventions.md +auto-commits: false +YAML + fi + echo " → .aider.conventions.md (root + project)" +} + +# ── Generic system prompt (Open WebUI, Mods, etc.) ────────── +generate_system_prompt() { + { + echo "You are a coding assistant working on a specific project." + echo "Follow all conventions from both the root agent context and project context." + echo "" + echo "---" + echo "" + root_block + cat "$PROJECT_FILE" + echo "" + echo "---" + } > .context/system-prompt.txt + echo " → .context/system-prompt.txt (root + project)" +} + +# ── MCP config ─────────────────────────────────────────────── +generate_mcp() { + # Ensure baseline file exists with project-specific knowledge server + if [ ! -f .context/mcp.json ]; then + cat > .context/mcp.json << 'JSON' +{ + "mcpServers": { + "knowledge": { + "url": "http://localhost:3100/mcp", + "description": "Project knowledge base — vector + graph retrieval" + } + } +} +JSON + fi + + # Merge root mcp-servers.json if found alongside root AGENT.md + local root_mcp="" + if [ -n "$ROOT_CONTEXT" ] && [ -f "$ROOT_CONTEXT" ]; then + local candidate + candidate="$(dirname "$ROOT_CONTEXT")/mcp-servers.json" + [ -f "$candidate" ] && root_mcp="$candidate" + fi + + if [ -z "$root_mcp" ]; then + echo " → .context/mcp.json (exists, no root mcp-servers.json found)" + return + fi + + # Root servers take precedence over project entries on key conflict + local root_servers count updated + root_servers=$(jq '.servers' "$root_mcp") + count=$(printf '%s' "$root_servers" | jq 'keys | length') + updated=$(jq --argjson root "$root_servers" \ + '.mcpServers = (.mcpServers + $root)' \ + .context/mcp.json) + printf '%s\n' "$updated" > .context/mcp.json + echo " → .context/mcp.json (merged $count root servers)" +} + +echo "Syncing project context from $PROJECT_FILE..." + +if [ ${#ADAPTERS[@]} -eq 0 ]; then + generate_claude + generate_agents + generate_cursor + generate_aider + generate_system_prompt + generate_mcp +else + for adapter in "${ADAPTERS[@]}"; do + case "$adapter" in + claude) generate_claude ;; + agents) generate_agents ;; + cursor) generate_cursor ;; + aider) generate_aider ;; + prompt|system|openwebui|owui|generic) generate_system_prompt ;; + mcp) generate_mcp ;; + *) echo "Unknown adapter: $adapter" >&2; exit 1 ;; + esac + done +fi + +echo "Done."