From ab937c4e9de7d1d29e2e2df839013c865b851007 Mon Sep 17 00:00:00 2001 From: Mannu Date: Sat, 30 May 2026 22:01:18 +0530 Subject: [PATCH] feat: Telegram alerting + public health probe + Umami visitor digest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Launch-critical monitoring wiring — alerts go to tiaBaby_Bot via Telegram. - src/lib/alert.ts: sendAlert(level, title, detail?, {fields, silent}) — HTML formatted, IST timestamped, best-effort (never throws). Env: TELEGRAM_BOT_TOKEN, TELEGRAM_CHAT_ID - GET /api/healthz: public, no-auth liveness probe (200 ok / 503 down) for Uptime Kuma + Dokploy healthcheck. No sensitive detail - cron/backup: alert on failure (fatal), warn if dump < 1KB (empty), silent success confirmation with file + size - cron/monitor: error-spike rising-edge detection (last 1h > 5 and > 2x prior hour — stateless, no re-alert on flat rate), DB/migrations/integration checks. ?test=1 sends a Telegram test ping - cron/visitor-summary: polls Umami REST API (login -> stats/metrics/active), posts visitor digest to Telegram. ?hours=N window (default 24) - CLAUDE.md: new env vars + Monitoring & Alerting section Health up/down flip detection is delegated to Uptime Kuma (pings /api/healthz); this code covers what Kuma can't see from outside. Co-Authored-By: Claude Opus 4.8 --- CLAUDE.md | 29 ++++- src/app/api/cron/backup/route.ts | 21 +++ src/app/api/cron/monitor/route.ts | 103 +++++++++++++++ src/app/api/cron/visitor-summary/route.ts | 152 ++++++++++++++++++++++ src/app/api/healthz/route.ts | 27 ++++ src/lib/alert.ts | 96 ++++++++++++++ 6 files changed, 427 insertions(+), 1 deletion(-) create mode 100644 src/app/api/cron/monitor/route.ts create mode 100644 src/app/api/cron/visitor-summary/route.ts create mode 100644 src/app/api/healthz/route.ts create mode 100644 src/lib/alert.ts diff --git a/CLAUDE.md b/CLAUDE.md index e979b58..21c1d98 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -237,6 +237,27 @@ Auth pattern: server component layout calls `verifyAdminSession()` → redirects --- +## Monitoring & Alerting + +Alerts go to Telegram (tiaBaby_Bot) via `src/lib/alert.ts` → `sendAlert(level, title, detail?, opts?)`. Best-effort, never throws. + +| Signal | Where | How it fires | +|--------|-------|--------------| +| Site/health up-down | **Uptime Kuma** (external, in Dokploy) | Pings `GET /api/healthz` (public, 200/503). Kuma handles flip detection + recovery | +| Backup fail / empty | `POST /api/cron/backup` | Alerts on exception, and warns if the gzipped dump < 1KB. Success is a silent confirmation | +| Error spikes | `/api/cron/monitor` | Rising-edge: errors in last 1h > 5 **and** > 2× the prior hour. Stateless (no re-alert on flat rate) | +| Internal health | `/api/cron/monitor` | DB unreachable, no migrations, missing integration env | +| Visitor digest | `/api/cron/visitor-summary` | Polls Umami REST API (login → stats/metrics/active), posts digest. `?hours=N` window | + +**Cron endpoints** all require the `x-cron-secret: $CRON_SECRET` header (same as backup). Schedule in Dokploy: +- `monitor` — hourly +- `visitor-summary` — daily (or `?hours=1` hourly during launch) +- `backup` — daily (existing) + +**Test the Telegram wiring:** `GET /api/cron/monitor?test=1` with the cron-secret header sends a test ping. + +--- + ## Data Storage Rules | Data Type | Storage | API | @@ -277,7 +298,13 @@ Set in `.env.local` for development, Dokploy dashboard for production. | `RESEND_API_KEY` | ✅ | Resend API key for transactional email | | `EMAIL_FROM` | ✅ | Sender address (e.g. `Tia `) | | `NEXT_PUBLIC_APP_URL` | ✅ | Full app URL (e.g. `https://tia.manohargupta.com`) | -| `CRON_SECRET` | ✅ | Secret for cron backup endpoint | +| `CRON_SECRET` | ✅ | Secret for cron endpoints (backup, monitor, visitor-summary) — sent as `x-cron-secret` header | +| `TELEGRAM_BOT_TOKEN` | ✅ | tiaBaby_Bot token from @BotFather — operational alerts | +| `TELEGRAM_CHAT_ID` | ✅ | Chat/group/channel id alerts post to (see `src/lib/alert.ts` header for how to get it) | +| `UMAMI_BASE_URL` | — | Umami instance (default `https://analytics.manohargupta.com`) | +| `UMAMI_USERNAME` | ✅ | Umami login — for the visitor-summary cron | +| `UMAMI_PASSWORD` | ✅ | Umami password | +| `UMAMI_WEBSITE_ID` | — | Umami website id (default Tia's id) | --- diff --git a/src/app/api/cron/backup/route.ts b/src/app/api/cron/backup/route.ts index 9656d22..636397a 100644 --- a/src/app/api/cron/backup/route.ts +++ b/src/app/api/cron/backup/route.ts @@ -4,6 +4,10 @@ import { promisify } from "util"; import { S3Client, PutObjectCommand, DeleteObjectCommand, ListObjectsV2Command } from "@aws-sdk/client-s3"; import fs from "fs/promises"; import { gzip } from "zlib"; +import { sendAlert } from "@/lib/alert"; + +// Below this, a gzipped dump almost certainly means an empty/failed pg_dump. +const MIN_BACKUP_BYTES = 1024; const execAsync = promisify(exec); const gzipAsync = promisify(gzip); @@ -63,9 +67,26 @@ export async function POST(request: Request) { // 5. Cleanup local await fs.unlink("/tmp/dump.sql").catch(() => {}); + const sizeKb = Math.round(compressed.length / 1024); + + // Suspiciously small dump — almost certainly an empty or broken backup. + if (compressed.length < MIN_BACKUP_BYTES) { + await sendAlert("error", "Backup looks empty", `Compressed dump is only ${compressed.length} bytes`, { + fields: { File: filename }, + }); + } else { + await sendAlert("info", "Backup completed", undefined, { + fields: { File: filename, Size: `${sizeKb} KB` }, + silent: true, // daily confirmation — no need to buzz + }); + } + return NextResponse.json({ success: true, filename, size: compressed.length }); } catch (e) { console.error("Backup failed:", e); + await sendAlert("fatal", "Database backup FAILED", String(e).slice(0, 500), { + fields: { File: filename }, + }); return NextResponse.json({ error: String(e) }, { status: 500 }); } } \ No newline at end of file diff --git a/src/app/api/cron/monitor/route.ts b/src/app/api/cron/monitor/route.ts new file mode 100644 index 0000000..f8691c0 --- /dev/null +++ b/src/app/api/cron/monitor/route.ts @@ -0,0 +1,103 @@ +import { NextResponse } from "next/server"; +import { sql } from "@/db"; +import { sendAlert } from "@/lib/alert"; + +/** + * Monitor cron — catches the failures Uptime Kuma can't see from the outside: + * • Error spikes (rising-edge: last hour vs the hour before) + * • DB unreachable + * • Migrations missing / integration env not configured + * + * Uptime Kuma handles up/down flip detection by pinging /api/healthz, so this + * focuses on internal signals. Recommended schedule: hourly. + * + * POST/GET /api/cron/monitor (header: x-cron-secret) + * GET /api/cron/monitor?test=1 — sends a test Telegram ping + * + * Stateless by design: error alerts use rising-edge comparison so a sustained + * (flat) error rate won't re-alert every run — only genuine new spikes do. + */ +export const dynamic = "force-dynamic"; + +const SPIKE_MIN = 5; // need at least this many errors in the last hour +const SPIKE_MULTIPLIER = 2; // …and > 2× the previous hour to count as a spike + +function authed(request: Request): boolean { + return request.headers.get("x-cron-secret") === process.env.CRON_SECRET; +} + +export async function POST(request: Request) { + if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + return runMonitor(request); +} + +export async function GET(request: Request) { + if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + return runMonitor(request); +} + +async function runMonitor(request: Request) { + const { searchParams } = new URL(request.url); + + // Manual test ping — confirms the Telegram wiring end-to-end. + if (searchParams.get("test")) { + const ok = await sendAlert("info", "Monitor test ping", "Telegram alerting is wired correctly. 🎉"); + return NextResponse.json({ ok, test: true }); + } + + const fired: string[] = []; + + // 1. Database reachable? + try { + await sql`SELECT 1`; + } catch (e) { + await sendAlert("fatal", "Database unreachable", String(e).slice(0, 300)); + return NextResponse.json({ ok: false, dbOk: false, fired: ["db_down"] }); + } + + // 2. Error spike — rising edge (last 1h vs the hour before it) + let recent = 0; + let prior = 0; + try { + const rows = await sql` + SELECT + COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '1 hour')::int AS recent, + COUNT(*) FILTER (WHERE created_at <= NOW() - INTERVAL '1 hour' + AND created_at > NOW() - INTERVAL '2 hours')::int AS prior + FROM error_events + `; + recent = Number(rows[0]?.recent) || 0; + prior = Number(rows[0]?.prior) || 0; + if (recent >= SPIKE_MIN && recent > prior * SPIKE_MULTIPLIER) { + await sendAlert("error", "Error spike detected", undefined, { + fields: { "Last hour": recent, "Previous hour": prior }, + }); + fired.push("error_spike"); + } + } catch { + /* error_events may not exist yet — non-fatal */ + } + + // 3. Migrations recorded + try { + const rows = await sql`SELECT COUNT(*)::int AS count FROM drizzle.__drizzle_migrations`; + if ((Number(rows[0]?.count) || 0) === 0) { + await sendAlert("warn", "No migrations recorded", "drizzle.__drizzle_migrations is empty", { silent: true }); + fired.push("no_migrations"); + } + } catch { + /* ignore */ + } + + // 4. Integration env presence + const missing: string[] = []; + if (!(process.env.LITELLM_BASE_URL && process.env.LITELLM_API_KEY)) missing.push("AI Gateway"); + if (!(process.env.R2_ACCOUNT_ID && process.env.R2_ACCESS_KEY_ID && process.env.R2_BUCKET_NAME)) missing.push("R2 Storage"); + if (!process.env.RESEND_API_KEY) missing.push("Email"); + if (missing.length) { + await sendAlert("warn", "Integration config missing", missing.join(", "), { silent: true }); + fired.push("config_missing"); + } + + return NextResponse.json({ ok: true, dbOk: true, errors: { recent, prior }, fired }); +} diff --git a/src/app/api/cron/visitor-summary/route.ts b/src/app/api/cron/visitor-summary/route.ts new file mode 100644 index 0000000..33aaae1 --- /dev/null +++ b/src/app/api/cron/visitor-summary/route.ts @@ -0,0 +1,152 @@ +import { NextResponse } from "next/server"; +import { sendAlert } from "@/lib/alert"; + +/** + * Visitor summary cron — polls the self-hosted Umami API and posts a digest to + * Telegram. Umami has no native webhook, so we authenticate and read its REST API. + * + * POST/GET /api/cron/visitor-summary (header: x-cron-secret) + * GET /api/cron/visitor-summary?hours=24 — window (default 24, max 168) + * + * Env: + * UMAMI_BASE_URL (default https://analytics.manohargupta.com) + * UMAMI_USERNAME Umami login (admin or a read-only user) + * UMAMI_PASSWORD + * UMAMI_WEBSITE_ID (default the Tia website id) + * + * Recommended schedule: once a day. Set hours=24 for a daily digest, or call it + * more often (e.g. hours=1 hourly) if you want tighter pulse during launch. + */ +export const dynamic = "force-dynamic"; + +const UMAMI_BASE = process.env.UMAMI_BASE_URL || "https://analytics.manohargupta.com"; +const WEBSITE_ID = process.env.UMAMI_WEBSITE_ID || "79444c19-ee31-4fab-baf5-f4e61098eeba"; + +function authed(request: Request): boolean { + return request.headers.get("x-cron-secret") === process.env.CRON_SECRET; +} + +export async function POST(request: Request) { + if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + return runSummary(request); +} + +export async function GET(request: Request) { + if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + return runSummary(request); +} + +/** Authenticate with Umami and return a Bearer token, or null on failure. */ +async function umamiLogin(): Promise { + try { + const res = await fetch(`${UMAMI_BASE}/api/auth/login`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + username: process.env.UMAMI_USERNAME, + password: process.env.UMAMI_PASSWORD, + }), + }); + if (!res.ok) { + console.error("umamiLogin failed:", res.status, await res.text().catch(() => "")); + return null; + } + const data = await res.json(); + return data?.token || null; + } catch (e) { + console.error("umamiLogin error:", e); + return null; + } +} + +// Umami wraps metric values as { value, prev }; tolerate both shapes + raw numbers. +const num = (v: unknown): number => { + if (typeof v === "number") return v; + if (v && typeof v === "object" && "value" in v) return Number((v as { value: unknown }).value) || 0; + return Number(v) || 0; +}; + +async function runSummary(request: Request) { + const { searchParams } = new URL(request.url); + const hours = Math.max(1, Math.min(168, Number(searchParams.get("hours")) || 24)); + const endAt = Date.now(); + const startAt = endAt - hours * 3_600_000; + + if (!process.env.UMAMI_USERNAME || !process.env.UMAMI_PASSWORD) { + return NextResponse.json({ error: "UMAMI_USERNAME / UMAMI_PASSWORD not set" }, { status: 500 }); + } + + const token = await umamiLogin(); + if (!token) { + await sendAlert("warn", "Visitor summary unavailable", "Could not authenticate with Umami", { silent: true }); + return NextResponse.json({ error: "umami auth failed" }, { status: 502 }); + } + + const headers = { Authorization: `Bearer ${token}` }; + const qs = `startAt=${startAt}&endAt=${endAt}`; + + // Stats + let stats: Record = {}; + try { + const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/stats?${qs}`, { headers }); + stats = await r.json(); + } catch (e) { + console.error("umami stats error:", e); + } + + // Top pages + let topPages: Array<{ x?: string; url?: string; y?: number; value?: number }> = []; + try { + const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/metrics?type=url&${qs}&limit=5`, { headers }); + const j = await r.json(); + if (Array.isArray(j)) topPages = j; + } catch (e) { + console.error("umami metrics error:", e); + } + + // Active right now + let active = 0; + try { + const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/active`, { headers }); + const a = await r.json(); + active = typeof a === "number" ? a : num((a as { visitors?: unknown; x?: unknown })?.visitors ?? (a as { x?: unknown })?.x); + } catch (e) { + console.error("umami active error:", e); + } + + const pageviews = num(stats.pageviews); + const visitors = num(stats.visitors); + const visits = num(stats.visits); + const bounces = num(stats.bounces); + const totaltime = num(stats.totaltime); + + const bounceRate = visits ? Math.round((bounces / visits) * 100) : 0; + const avgVisit = visits ? Math.round(totaltime / visits) : 0; // seconds + + const pagesText = + topPages + .slice(0, 5) + .map(p => `• ${p.x || p.url || "?"} — ${p.y ?? p.value ?? 0}`) + .join("\n") || "—"; + + const label = hours === 24 ? "Last 24h" : `Last ${hours}h`; + const body = [ + `👥 Visitors: ${visitors}`, + `🔁 Visits: ${visits}`, + `📄 Page views: ${pageviews}`, + `↩️ Bounce rate: ${bounceRate}%`, + `⏱ Avg visit: ${avgVisit}s`, + `🟢 Active now: ${active}`, + ``, + `Top pages:`, + pagesText, + ].join("\n"); + + const delivered = await sendAlert("info", `📊 Visitor summary — ${label}`, body, { silent: true }); + + return NextResponse.json({ + ok: true, + delivered, + summary: { visitors, visits, pageviews, bounceRate, avgVisit, active }, + }); +} diff --git a/src/app/api/healthz/route.ts b/src/app/api/healthz/route.ts new file mode 100644 index 0000000..7da60fd --- /dev/null +++ b/src/app/api/healthz/route.ts @@ -0,0 +1,27 @@ +import { NextResponse } from "next/server"; +import { sql } from "@/db"; + +/** + * GET /api/healthz — public, unauthenticated liveness probe. + * + * For external monitors (Uptime Kuma) and the Docker / Dokploy healthcheck. + * Returns NO sensitive detail — only whether the app can reach the database. + * 200 { status: "ok", db: true } + * 503 { status: "down", db: false } + */ +export const dynamic = "force-dynamic"; + +export async function GET() { + try { + await sql`SELECT 1`; + return NextResponse.json( + { status: "ok", db: true, ts: new Date().toISOString() }, + { headers: { "Cache-Control": "no-store" } }, + ); + } catch { + return NextResponse.json( + { status: "down", db: false, ts: new Date().toISOString() }, + { status: 503, headers: { "Cache-Control": "no-store" } }, + ); + } +} diff --git a/src/lib/alert.ts b/src/lib/alert.ts new file mode 100644 index 0000000..d708cc2 --- /dev/null +++ b/src/lib/alert.ts @@ -0,0 +1,96 @@ +/** + * alert.ts — operational alerts to Telegram (tiaBaby_Bot). + * + * Best-effort: every function swallows its own errors so a failed alert can + * never cascade into another failure (mirrors logError / logAudit). + * + * Env: + * TELEGRAM_BOT_TOKEN — from @BotFather + * TELEGRAM_CHAT_ID — chat / channel / group id to post into + * + * Get your chat id: send any message to the bot, then + * curl "https://api.telegram.org/bot/getUpdates" + * and read result[].message.chat.id (negative for groups/channels) + */ + +export type AlertLevel = "info" | "warn" | "error" | "fatal"; + +const EMOJI: Record = { + info: "🔵", + warn: "🟡", + error: "🔴", + fatal: "🚨", +}; + +/** Escape the three characters Telegram HTML parse-mode treats specially. */ +function escapeHtml(s: string): string { + return s.replace(/&/g, "&").replace(//g, ">"); +} + +function istNow(): string { + return new Date().toLocaleString("en-IN", { + timeZone: "Asia/Kolkata", + day: "numeric", + month: "short", + hour: "2-digit", + minute: "2-digit", + hour12: true, + }); +} + +export interface AlertOptions { + /** Key/value rows rendered (bold key) beneath the message. */ + fields?: Record; + /** Send without a notification sound. */ + silent?: boolean; +} + +/** + * Send an operational alert to Telegram. Never throws. + * @returns true if delivered, false if skipped or failed. + */ +export async function sendAlert( + level: AlertLevel, + title: string, + detail?: string, + opts: AlertOptions = {}, +): Promise { + const token = process.env.TELEGRAM_BOT_TOKEN; + const chatId = process.env.TELEGRAM_CHAT_ID; + if (!token || !chatId) { + console.warn("sendAlert: TELEGRAM_BOT_TOKEN / TELEGRAM_CHAT_ID not set — skipping"); + return false; + } + + const lines: string[] = [`${EMOJI[level]} ${escapeHtml(title)}`]; + if (detail) lines.push("", escapeHtml(detail.slice(0, 3500))); + if (opts.fields && Object.keys(opts.fields).length) { + lines.push(""); + for (const [k, v] of Object.entries(opts.fields)) { + lines.push(`${escapeHtml(k)}: ${escapeHtml(String(v))}`); + } + } + lines.push("", `🌸 Tia · ${istNow()} IST`); + + try { + const res = await fetch(`https://api.telegram.org/bot${token}/sendMessage`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + chat_id: chatId, + text: lines.join("\n"), + parse_mode: "HTML", + disable_web_page_preview: true, + disable_notification: !!opts.silent, + }), + }); + if (!res.ok) { + console.error("sendAlert: Telegram API error", res.status, await res.text().catch(() => "")); + return false; + } + return true; + } catch (e) { + console.error("sendAlert failed:", e); + return false; + } +}