feat(bridge): real sub-agent spawning (replaces spawn.ts placeholder)

- lib/agents.ts: canonical specialist registry (cody/ethan/cathy/elon),
  legacy alias normalization (coder/researcher/writer/pm), personas,
  documented upgrade path to true per-agent OpenClaw config
- spawn.ts: executes isolated OpenClaw sessions via docker exec with
  temp-file message transport, tracks runs in the executions table,
  serializes turns (MAX_CONCURRENT=1, RAM-constrained host), reports
  completion to Telegram via /tiger/notify
- new: GET /runs, GET /runs/:id for dashboard status
This commit is contained in:
Manohar 2026-06-10 02:46:48 +00:00
parent 61e386f7fe
commit 1a8358bb6a
2 changed files with 430 additions and 42 deletions

132
bridge/src/lib/agents.ts Normal file
View file

@ -0,0 +1,132 @@
/**
* lib/agents.ts Sub-agent registry (single source of truth)
*
* Why this file exists:
* Agent identity was previously scattered across spawn.ts, agents-activity.ts,
* dispatch.ts and the dashboard, with TWO competing id schemes:
* - classifier ids: cody / ethan / cathy / elon (lib/llm.ts AGENT_IDS)
* - legacy UI ids: coder / researcher / writer / pm
* This registry canonicalizes on the classifier ids and maps legacy
* aliases onto them, so every layer can call normalizeAgentId() and agree.
*
* Personas:
* Sub-agents currently run as isolated *sessions* of the `main` OpenClaw
* agent (one shared workspace, separate conversation histories). The
* persona block below is prepended to the task message, acting as the
* specialist's system prompt for that session.
*
* Upgrade path (documented, not yet taken): define real per-agent entries
* in openclaw.json `agents.list`, each with its own IDENTITY.md and
* workspace, then change ONE line in spawn.ts the `--agent` flag.
*/
export type SpecialistId = "cody" | "ethan" | "cathy" | "elon";
export interface SpecialistAgent {
id: SpecialistId;
/** Display name used across the dashboard and Telegram reports. */
name: string;
/** Short role label for UI chips. */
role: string;
/** Legacy ids that must keep working (old UI, old API callers). */
aliases: string[];
/** Persona preamble injected at the top of every spawned session. */
persona: string;
}
export const SPECIALISTS: Record<SpecialistId, SpecialistAgent> = {
cody: {
id: "cody",
name: "Cody",
role: "Code",
aliases: ["coder"],
persona: [
"You are Cody, Tiger's software engineering specialist.",
"Scope: code, debugging, devops, deployments, scripts, infra, build systems.",
"Style: read existing code before changing it; smallest correct diff;",
"state assumptions explicitly; never run destructive commands without flagging.",
].join(" "),
},
ethan: {
id: "ethan",
name: "Ethan",
role: "Research",
aliases: ["researcher"],
persona: [
"You are Ethan, Tiger's research specialist.",
"Scope: market research, policy analysis, technical investigation, due diligence.",
"Style: cite sources, separate facts from inference, quantify with units,",
"end with a short actionable summary.",
].join(" "),
},
cathy: {
id: "cathy",
name: "Cathy",
role: "Write",
aliases: ["writer"],
persona: [
"You are Cathy, Tiger's writing specialist.",
"Scope: documents, summaries, reports, communication drafts.",
"Style: clear structure, no filler, match the register the task asks for.",
].join(" "),
},
elon: {
id: "elon",
name: "Elon",
role: "PM",
aliases: ["pm"],
persona: [
"You are Elon, Tiger's project management specialist.",
"Scope: planning, prioritization, breaking work into tasks, status synthesis.",
"Style: concrete next actions with owners and order; surface blockers first.",
].join(" "),
},
};
/** All ids + aliases that POST /tiger/spawn accepts. */
export const ACCEPTED_AGENT_IDS: string[] = Object.values(SPECIALISTS).flatMap(
(a) => [a.id, ...a.aliases],
);
/**
* Map any accepted id/alias ("coder", "cody", "CODY") to its canonical
* specialist, or null if unknown. "tiger"/"main" are deliberately NOT
* spawnable Tiger is the orchestrator, not a sub-agent.
*/
export function normalizeAgentId(raw: string): SpecialistAgent | null {
const id = (raw || "").trim().toLowerCase();
for (const agent of Object.values(SPECIALISTS)) {
if (agent.id === id || agent.aliases.includes(id)) return agent;
}
return null;
}
/**
* Build the message a spawned session receives: persona + task + optional
* context + reporting contract. The reporting contract matters the spawn
* runner parses the final reply and relays it to Telegram, so we ask for a
* result the human can read in one glance.
*/
export function buildSpawnPrompt(
agent: SpecialistAgent,
task: string,
context?: string,
): string {
const parts = [
`[SUB-AGENT SESSION — ${agent.name} (${agent.role})]`,
agent.persona,
"",
"TASK:",
task.trim(),
];
if (context && context.trim()) {
parts.push("", "CONTEXT:", context.trim());
}
parts.push(
"",
"When finished, end your reply with a line starting with 'RESULT:' " +
"summarizing the outcome in 1-3 sentences. If you could not complete " +
"the task, start that line with 'BLOCKED:' and say what you need.",
);
return parts.join("\n");
}

View file

@ -1,66 +1,322 @@
/**
* spawn.ts POST /tiger/spawn
* spawn.ts POST /tiger/spawn : REAL sub-agent execution
*
* Trigger spawning of sub-agents. This is a placeholder -
* real implementation requires sub-agent permission config.
* Replaces the long-standing placeholder. A spawn is an isolated OpenClaw
* session of the `main` agent running with a specialist persona prepended
* (see lib/agents.ts for the registry and the per-agent upgrade path).
*
* POST /tiger/spawn
* { agentId: "coder" | "researcher" | "writer" | "pm", task: "..." }
* Flow per spawn:
* 1. validate + normalize agent id (accepts cody/ethan/cathy/elon + legacy aliases)
* 2. insert a row into `executions` (status = running while exit_code IS NULL)
* 3. enqueue at most MAX_CONCURRENT sessions run at once. The VPS is
* memory-constrained; parallel agent turns push it into swap and every
* turn times out. Serializing is a feature, not a limitation.
* 4. run `openclaw agent --session-id spawn-<agent>-<n> ... --json` inside
* the tiger-openclaw container. The message travels via docker cp of a
* temp file same battle-tested pattern as lib/telegram.ts, immune to
* shell-escaping bugs from quotes/backticks/JSON in task text.
* 5. parse the reply, complete the executions row, fire a Telegram
* notification through the bridge's own /tiger/notify route.
*
* Response:
* { ok: true, sessionId, status: "spawned" | "pending" }
* Routes:
* POST /tiger/spawn { agentId, task, context?, taskId? }
* GET /tiger/spawn/runs recent spawn runs (+ live queue state)
* GET /tiger/spawn/runs/:id one run with full output
* GET /tiger/spawn/agents the specialist registry
*/
import { Router, Request, Response } from "express";
import { execInSandbox } from "../tiger.js";
import { exec } from "child_process";
import { promisify } from "util";
import { writeFileSync, unlinkSync } from "fs";
import { randomUUID } from "crypto";
import db, { generateId } from "../db.js";
import {
SPECIALISTS,
ACCEPTED_AGENT_IDS,
normalizeAgentId,
buildSpawnPrompt,
type SpecialistAgent,
} from "../lib/agents.js";
const execAsync = promisify(exec);
const router = Router();
const validAgents = ["coder", "researcher", "writer", "pm"];
const DOCKER_CONTAINER = "tiger-openclaw";
/** One agent turn at a time see header comment about RAM. Raise after the
* server is upgraded / the homelab is evicted. */
const MAX_CONCURRENT = 1;
/** Keep below the 300s cron budget so cron-triggered spawns can't be the
* thing that blows the cron's own timeout. */
const SPAWN_TIMEOUT_SECONDS = 240;
router.post("/", async (req: Request, res: Response) => {
const { agentId, task } = req.body;
if (!agentId || !validAgents.includes(agentId)) {
return res.status(400).json({
ok: false,
error: `Invalid agent. Use: ${validAgents.join(", ")}`
const BRIDGE_SELF_URL = process.env.TIGER_BRIDGE_SELF_URL || "http://127.0.0.1:3456";
const BRIDGE_TOKEN = process.env.TIGER_BRIDGE_TOKEN || "";
// ─── Run bookkeeping ─────────────────────────────────────────────────────────
interface SpawnRequest {
runId: string;
agent: SpecialistAgent;
task: string;
context?: string;
sessionId: string;
}
interface SpawnOutcome {
ok: boolean;
reply: string;
error?: string;
}
let activeCount = 0;
const queue: Array<() => Promise<void>> = [];
function pump(): void {
while (activeCount < MAX_CONCURRENT && queue.length > 0) {
const job = queue.shift();
if (!job) break;
activeCount += 1;
void job().finally(() => {
activeCount -= 1;
pump();
});
}
if (!task) {
return res.status(400).json({ ok: false, error: "task is required" });
}
// ─── Core runner (exported so lib/inbox.ts can spawn without HTTP) ──────────
export interface SpawnTicket {
runId: string;
sessionId: string;
agent: { id: string; name: string };
queued: number;
}
export function spawnTask(input: {
agentId: string;
task: string;
context?: string;
taskId?: string;
}): SpawnTicket {
const agent = normalizeAgentId(input.agentId);
if (!agent) {
throw new Error(
`Unknown agent '${input.agentId}'. Accepted: ${ACCEPTED_AGENT_IDS.join(", ")}`,
);
}
const task = (input.task || "").trim();
if (!task) throw new Error("task is required");
const runId = generateId("exec");
const sessionId = `spawn-${agent.id}-${randomUUID().slice(0, 8)}`;
// exit_code NULL = still running; completed_at NULL until the turn ends.
db.prepare(
`INSERT INTO executions (id, task_id, agent, command)
VALUES (?, ?, ?, ?)`,
).run(runId, input.taskId ?? null, agent.id, `spawn: ${task.slice(0, 300)}`);
const req: SpawnRequest = { runId, agent, task, context: input.context, sessionId };
queue.push(() => executeSpawn(req));
pump();
return {
runId,
sessionId,
agent: { id: agent.id, name: agent.name },
queued: queue.length,
};
}
async function executeSpawn(req: SpawnRequest): Promise<void> {
const { runId, agent, task, context, sessionId } = req;
const prompt = buildSpawnPrompt(agent, task, context);
const tmpFile = `/tmp/spawn_${runId}.txt`;
let outcome: SpawnOutcome;
try {
// Note: Sub-agent spawning requires config
// This is a placeholder - returns info about what's needed
res.json({
ok: true,
agentId,
task,
status: "pending",
message: "Sub-agent spawning requires config. Set agents.defaults.subagents in openclaw.json"
// Stage the message inside the container (escaping-proof transport).
writeFileSync(tmpFile, prompt, "utf-8");
await execAsync(`docker cp ${tmpFile} ${DOCKER_CONTAINER}:${tmpFile}`, {
timeout: 10_000,
});
} catch (err: any) {
res.status(500).json({ ok: false, error: err.message });
unlinkSync(tmpFile);
const cmd =
`docker exec ${DOCKER_CONTAINER} sh -c '` +
`MSG=$(cat ${tmpFile}); rm -f ${tmpFile}; ` +
`openclaw agent --session-id ${sessionId} -m "$MSG" --json ` +
`--timeout ${SPAWN_TIMEOUT_SECONDS}'`;
const { stdout } = await execAsync(cmd, {
timeout: (SPAWN_TIMEOUT_SECONDS + 30) * 1000,
maxBuffer: 10 * 1024 * 1024,
});
outcome = { ok: true, reply: extractReply(stdout) };
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error(`[spawn] ${runId} (${agent.id}) failed:`, message);
outcome = { ok: false, reply: "", error: message };
try { unlinkSync(tmpFile); } catch { /* already gone */ }
}
db.prepare(
`UPDATE executions
SET stdout = ?, stderr = ?, exit_code = ?, completed_at = datetime('now')
WHERE id = ?`,
).run(outcome.reply, outcome.error ?? "", outcome.ok ? 0 : 1, runId);
await notifyCompletion(req, outcome);
}
/** Pull the text reply out of `openclaw agent --json` output. */
function extractReply(stdout: string): string {
let parsed: unknown;
try {
parsed = JSON.parse(stdout);
} catch {
return stdout.trim();
}
const p = parsed as Record<string, any>;
return (
p?.result?.payloads?.[0]?.text ||
p?.payloads?.[0]?.text ||
p?.summary ||
p?.text ||
p?.output ||
stdout.trim()
);
}
/** Report the outcome to Telegram via the bridge's own notify route. */
async function notifyCompletion(req: SpawnRequest, outcome: SpawnOutcome): Promise<void> {
const { agent, task, runId } = req;
const resultLine =
outcome.reply
.split("\n")
.reverse()
.find((l) => l.startsWith("RESULT:") || l.startsWith("BLOCKED:")) ??
outcome.reply.slice(-300);
const message = outcome.ok
? `🤖 *${agent.name}* finished: ${task.slice(0, 120)}\n\n${resultLine.slice(0, 800)}\n\n_run ${runId}_`
: `⚠️ *${agent.name}* failed: ${task.slice(0, 120)}\n\n${(outcome.error ?? "unknown error").slice(0, 300)}\n\n_run ${runId}_`;
try {
await fetch(`${BRIDGE_SELF_URL}/tiger/notify`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${BRIDGE_TOKEN}`,
},
body: JSON.stringify({ message }),
});
} catch (err) {
// Notification failure must never mark the run failed — log and move on.
const m = err instanceof Error ? err.message : String(err);
console.error(`[spawn] notify failed for ${runId}:`, m);
}
}
// ─── HTTP surface ────────────────────────────────────────────────────────────
interface ExecutionRow {
id: string;
task_id: string | null;
agent: string | null;
command: string | null;
stdout: string;
stderr: string;
exit_code: number | null;
started_at: string;
completed_at: string | null;
}
function rowStatus(row: ExecutionRow): "running" | "done" | "error" {
if (row.exit_code === null) return "running";
return row.exit_code === 0 ? "done" : "error";
}
router.post("/", (req: Request, res: Response) => {
const { agentId, task, context, taskId } = req.body as {
agentId?: string;
task?: string;
context?: string;
taskId?: string;
};
try {
const ticket = spawnTask({
agentId: agentId ?? "",
task: task ?? "",
context,
taskId,
});
res.json({ ok: true, status: "spawned", ...ticket });
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
res.status(400).json({ ok: false, error: message });
}
});
// GET available agents
router.get("/agents", (_req: Request, res: Response) => {
router.get("/runs", (_req: Request, res: Response) => {
const rows = db
.prepare(
`SELECT id, task_id, agent, command, exit_code, started_at, completed_at
FROM executions
WHERE command LIKE 'spawn:%'
ORDER BY started_at DESC
LIMIT 50`,
)
.all() as ExecutionRow[];
res.json({
ok: true,
agents: validAgents.map(id => ({
id,
name: id === "coder" ? "Cody" :
id === "researcher" ? "Ethan" :
id === "writer" ? "Cathy" : "Elon",
role: id === "coder" ? "Code" :
id === "researcher" ? "Research" :
id === "writer" ? "Write" : "PM"
}))
active: activeCount,
queued: queue.length,
runs: rows.map((r) => ({
runId: r.id,
agent: r.agent,
task: (r.command ?? "").replace(/^spawn:\s*/, ""),
status: rowStatus(r),
startedAt: r.started_at,
completedAt: r.completed_at,
})),
});
});
export default router
router.get("/runs/:id", (req: Request, res: Response) => {
const row = db
.prepare(`SELECT * FROM executions WHERE id = ?`)
.get(req.params.id) as ExecutionRow | undefined;
if (!row) return res.status(404).json({ ok: false, error: "run not found" });
res.json({
ok: true,
run: {
runId: row.id,
agent: row.agent,
task: (row.command ?? "").replace(/^spawn:\s*/, ""),
status: rowStatus(row),
reply: row.stdout,
error: row.stderr,
startedAt: row.started_at,
completedAt: row.completed_at,
},
});
});
router.get("/agents", (_req: Request, res: Response) => {
res.json({
ok: true,
agents: Object.values(SPECIALISTS).map((a) => ({
id: a.id,
name: a.name,
role: a.role,
aliases: a.aliases,
})),
});
});
export default router;