feat: Telegram alerting + public health probe + Umami visitor digest

Launch-critical monitoring wiring — alerts go to tiaBaby_Bot via Telegram.

- src/lib/alert.ts: sendAlert(level, title, detail?, {fields, silent}) — HTML
  formatted, IST timestamped, best-effort (never throws). Env: TELEGRAM_BOT_TOKEN,
  TELEGRAM_CHAT_ID
- GET /api/healthz: public, no-auth liveness probe (200 ok / 503 down) for
  Uptime Kuma + Dokploy healthcheck. No sensitive detail
- cron/backup: alert on failure (fatal), warn if dump < 1KB (empty), silent
  success confirmation with file + size
- cron/monitor: error-spike rising-edge detection (last 1h > 5 and > 2x prior
  hour — stateless, no re-alert on flat rate), DB/migrations/integration checks.
  ?test=1 sends a Telegram test ping
- cron/visitor-summary: polls Umami REST API (login -> stats/metrics/active),
  posts visitor digest to Telegram. ?hours=N window (default 24)
- CLAUDE.md: new env vars + Monitoring & Alerting section

Health up/down flip detection is delegated to Uptime Kuma (pings /api/healthz);
this code covers what Kuma can't see from outside.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Manohar Gupta 2026-05-30 22:01:18 +05:30
parent a89ab96a12
commit ab937c4e9d
6 changed files with 427 additions and 1 deletions

View file

@ -237,6 +237,27 @@ Auth pattern: server component layout calls `verifyAdminSession()` → redirects
--- ---
## Monitoring & Alerting
Alerts go to Telegram (tiaBaby_Bot) via `src/lib/alert.ts``sendAlert(level, title, detail?, opts?)`. Best-effort, never throws.
| Signal | Where | How it fires |
|--------|-------|--------------|
| Site/health up-down | **Uptime Kuma** (external, in Dokploy) | Pings `GET /api/healthz` (public, 200/503). Kuma handles flip detection + recovery |
| Backup fail / empty | `POST /api/cron/backup` | Alerts on exception, and warns if the gzipped dump < 1KB. Success is a silent confirmation |
| Error spikes | `/api/cron/monitor` | Rising-edge: errors in last 1h > 5 **and** > 2× the prior hour. Stateless (no re-alert on flat rate) |
| Internal health | `/api/cron/monitor` | DB unreachable, no migrations, missing integration env |
| Visitor digest | `/api/cron/visitor-summary` | Polls Umami REST API (login → stats/metrics/active), posts digest. `?hours=N` window |
**Cron endpoints** all require the `x-cron-secret: $CRON_SECRET` header (same as backup). Schedule in Dokploy:
- `monitor` — hourly
- `visitor-summary` — daily (or `?hours=1` hourly during launch)
- `backup` — daily (existing)
**Test the Telegram wiring:** `GET /api/cron/monitor?test=1` with the cron-secret header sends a test ping.
---
## Data Storage Rules ## Data Storage Rules
| Data Type | Storage | API | | Data Type | Storage | API |
@ -277,7 +298,13 @@ Set in `.env.local` for development, Dokploy dashboard for production.
| `RESEND_API_KEY` | ✅ | Resend API key for transactional email | | `RESEND_API_KEY` | ✅ | Resend API key for transactional email |
| `EMAIL_FROM` | ✅ | Sender address (e.g. `Tia <tia@manohargupta.com>`) | | `EMAIL_FROM` | ✅ | Sender address (e.g. `Tia <tia@manohargupta.com>`) |
| `NEXT_PUBLIC_APP_URL` | ✅ | Full app URL (e.g. `https://tia.manohargupta.com`) | | `NEXT_PUBLIC_APP_URL` | ✅ | Full app URL (e.g. `https://tia.manohargupta.com`) |
| `CRON_SECRET` | ✅ | Secret for cron backup endpoint | | `CRON_SECRET` | ✅ | Secret for cron endpoints (backup, monitor, visitor-summary) — sent as `x-cron-secret` header |
| `TELEGRAM_BOT_TOKEN` | ✅ | tiaBaby_Bot token from @BotFather — operational alerts |
| `TELEGRAM_CHAT_ID` | ✅ | Chat/group/channel id alerts post to (see `src/lib/alert.ts` header for how to get it) |
| `UMAMI_BASE_URL` | — | Umami instance (default `https://analytics.manohargupta.com`) |
| `UMAMI_USERNAME` | ✅ | Umami login — for the visitor-summary cron |
| `UMAMI_PASSWORD` | ✅ | Umami password |
| `UMAMI_WEBSITE_ID` | — | Umami website id (default Tia's id) |
--- ---

View file

@ -4,6 +4,10 @@ import { promisify } from "util";
import { S3Client, PutObjectCommand, DeleteObjectCommand, ListObjectsV2Command } from "@aws-sdk/client-s3"; import { S3Client, PutObjectCommand, DeleteObjectCommand, ListObjectsV2Command } from "@aws-sdk/client-s3";
import fs from "fs/promises"; import fs from "fs/promises";
import { gzip } from "zlib"; import { gzip } from "zlib";
import { sendAlert } from "@/lib/alert";
// Below this, a gzipped dump almost certainly means an empty/failed pg_dump.
const MIN_BACKUP_BYTES = 1024;
const execAsync = promisify(exec); const execAsync = promisify(exec);
const gzipAsync = promisify(gzip); const gzipAsync = promisify(gzip);
@ -63,9 +67,26 @@ export async function POST(request: Request) {
// 5. Cleanup local // 5. Cleanup local
await fs.unlink("/tmp/dump.sql").catch(() => {}); await fs.unlink("/tmp/dump.sql").catch(() => {});
const sizeKb = Math.round(compressed.length / 1024);
// Suspiciously small dump — almost certainly an empty or broken backup.
if (compressed.length < MIN_BACKUP_BYTES) {
await sendAlert("error", "Backup looks empty", `Compressed dump is only ${compressed.length} bytes`, {
fields: { File: filename },
});
} else {
await sendAlert("info", "Backup completed", undefined, {
fields: { File: filename, Size: `${sizeKb} KB` },
silent: true, // daily confirmation — no need to buzz
});
}
return NextResponse.json({ success: true, filename, size: compressed.length }); return NextResponse.json({ success: true, filename, size: compressed.length });
} catch (e) { } catch (e) {
console.error("Backup failed:", e); console.error("Backup failed:", e);
await sendAlert("fatal", "Database backup FAILED", String(e).slice(0, 500), {
fields: { File: filename },
});
return NextResponse.json({ error: String(e) }, { status: 500 }); return NextResponse.json({ error: String(e) }, { status: 500 });
} }
} }

View file

@ -0,0 +1,103 @@
import { NextResponse } from "next/server";
import { sql } from "@/db";
import { sendAlert } from "@/lib/alert";
/**
* Monitor cron catches the failures Uptime Kuma can't see from the outside:
* Error spikes (rising-edge: last hour vs the hour before)
* DB unreachable
* Migrations missing / integration env not configured
*
* Uptime Kuma handles up/down flip detection by pinging /api/healthz, so this
* focuses on internal signals. Recommended schedule: hourly.
*
* POST/GET /api/cron/monitor (header: x-cron-secret)
* GET /api/cron/monitor?test=1 sends a test Telegram ping
*
* Stateless by design: error alerts use rising-edge comparison so a sustained
* (flat) error rate won't re-alert every run only genuine new spikes do.
*/
export const dynamic = "force-dynamic";
const SPIKE_MIN = 5; // need at least this many errors in the last hour
const SPIKE_MULTIPLIER = 2; // …and > 2× the previous hour to count as a spike
function authed(request: Request): boolean {
return request.headers.get("x-cron-secret") === process.env.CRON_SECRET;
}
export async function POST(request: Request) {
if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
return runMonitor(request);
}
export async function GET(request: Request) {
if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
return runMonitor(request);
}
async function runMonitor(request: Request) {
const { searchParams } = new URL(request.url);
// Manual test ping — confirms the Telegram wiring end-to-end.
if (searchParams.get("test")) {
const ok = await sendAlert("info", "Monitor test ping", "Telegram alerting is wired correctly. 🎉");
return NextResponse.json({ ok, test: true });
}
const fired: string[] = [];
// 1. Database reachable?
try {
await sql`SELECT 1`;
} catch (e) {
await sendAlert("fatal", "Database unreachable", String(e).slice(0, 300));
return NextResponse.json({ ok: false, dbOk: false, fired: ["db_down"] });
}
// 2. Error spike — rising edge (last 1h vs the hour before it)
let recent = 0;
let prior = 0;
try {
const rows = await sql`
SELECT
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '1 hour')::int AS recent,
COUNT(*) FILTER (WHERE created_at <= NOW() - INTERVAL '1 hour'
AND created_at > NOW() - INTERVAL '2 hours')::int AS prior
FROM error_events
`;
recent = Number(rows[0]?.recent) || 0;
prior = Number(rows[0]?.prior) || 0;
if (recent >= SPIKE_MIN && recent > prior * SPIKE_MULTIPLIER) {
await sendAlert("error", "Error spike detected", undefined, {
fields: { "Last hour": recent, "Previous hour": prior },
});
fired.push("error_spike");
}
} catch {
/* error_events may not exist yet — non-fatal */
}
// 3. Migrations recorded
try {
const rows = await sql`SELECT COUNT(*)::int AS count FROM drizzle.__drizzle_migrations`;
if ((Number(rows[0]?.count) || 0) === 0) {
await sendAlert("warn", "No migrations recorded", "drizzle.__drizzle_migrations is empty", { silent: true });
fired.push("no_migrations");
}
} catch {
/* ignore */
}
// 4. Integration env presence
const missing: string[] = [];
if (!(process.env.LITELLM_BASE_URL && process.env.LITELLM_API_KEY)) missing.push("AI Gateway");
if (!(process.env.R2_ACCOUNT_ID && process.env.R2_ACCESS_KEY_ID && process.env.R2_BUCKET_NAME)) missing.push("R2 Storage");
if (!process.env.RESEND_API_KEY) missing.push("Email");
if (missing.length) {
await sendAlert("warn", "Integration config missing", missing.join(", "), { silent: true });
fired.push("config_missing");
}
return NextResponse.json({ ok: true, dbOk: true, errors: { recent, prior }, fired });
}

View file

@ -0,0 +1,152 @@
import { NextResponse } from "next/server";
import { sendAlert } from "@/lib/alert";
/**
* Visitor summary cron polls the self-hosted Umami API and posts a digest to
* Telegram. Umami has no native webhook, so we authenticate and read its REST API.
*
* POST/GET /api/cron/visitor-summary (header: x-cron-secret)
* GET /api/cron/visitor-summary?hours=24 window (default 24, max 168)
*
* Env:
* UMAMI_BASE_URL (default https://analytics.manohargupta.com)
* UMAMI_USERNAME Umami login (admin or a read-only user)
* UMAMI_PASSWORD
* UMAMI_WEBSITE_ID (default the Tia website id)
*
* Recommended schedule: once a day. Set hours=24 for a daily digest, or call it
* more often (e.g. hours=1 hourly) if you want tighter pulse during launch.
*/
export const dynamic = "force-dynamic";
const UMAMI_BASE = process.env.UMAMI_BASE_URL || "https://analytics.manohargupta.com";
const WEBSITE_ID = process.env.UMAMI_WEBSITE_ID || "79444c19-ee31-4fab-baf5-f4e61098eeba";
function authed(request: Request): boolean {
return request.headers.get("x-cron-secret") === process.env.CRON_SECRET;
}
export async function POST(request: Request) {
if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
return runSummary(request);
}
export async function GET(request: Request) {
if (!authed(request)) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
return runSummary(request);
}
/** Authenticate with Umami and return a Bearer token, or null on failure. */
async function umamiLogin(): Promise<string | null> {
try {
const res = await fetch(`${UMAMI_BASE}/api/auth/login`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
username: process.env.UMAMI_USERNAME,
password: process.env.UMAMI_PASSWORD,
}),
});
if (!res.ok) {
console.error("umamiLogin failed:", res.status, await res.text().catch(() => ""));
return null;
}
const data = await res.json();
return data?.token || null;
} catch (e) {
console.error("umamiLogin error:", e);
return null;
}
}
// Umami wraps metric values as { value, prev }; tolerate both shapes + raw numbers.
const num = (v: unknown): number => {
if (typeof v === "number") return v;
if (v && typeof v === "object" && "value" in v) return Number((v as { value: unknown }).value) || 0;
return Number(v) || 0;
};
async function runSummary(request: Request) {
const { searchParams } = new URL(request.url);
const hours = Math.max(1, Math.min(168, Number(searchParams.get("hours")) || 24));
const endAt = Date.now();
const startAt = endAt - hours * 3_600_000;
if (!process.env.UMAMI_USERNAME || !process.env.UMAMI_PASSWORD) {
return NextResponse.json({ error: "UMAMI_USERNAME / UMAMI_PASSWORD not set" }, { status: 500 });
}
const token = await umamiLogin();
if (!token) {
await sendAlert("warn", "Visitor summary unavailable", "Could not authenticate with Umami", { silent: true });
return NextResponse.json({ error: "umami auth failed" }, { status: 502 });
}
const headers = { Authorization: `Bearer ${token}` };
const qs = `startAt=${startAt}&endAt=${endAt}`;
// Stats
let stats: Record<string, unknown> = {};
try {
const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/stats?${qs}`, { headers });
stats = await r.json();
} catch (e) {
console.error("umami stats error:", e);
}
// Top pages
let topPages: Array<{ x?: string; url?: string; y?: number; value?: number }> = [];
try {
const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/metrics?type=url&${qs}&limit=5`, { headers });
const j = await r.json();
if (Array.isArray(j)) topPages = j;
} catch (e) {
console.error("umami metrics error:", e);
}
// Active right now
let active = 0;
try {
const r = await fetch(`${UMAMI_BASE}/api/websites/${WEBSITE_ID}/active`, { headers });
const a = await r.json();
active = typeof a === "number" ? a : num((a as { visitors?: unknown; x?: unknown })?.visitors ?? (a as { x?: unknown })?.x);
} catch (e) {
console.error("umami active error:", e);
}
const pageviews = num(stats.pageviews);
const visitors = num(stats.visitors);
const visits = num(stats.visits);
const bounces = num(stats.bounces);
const totaltime = num(stats.totaltime);
const bounceRate = visits ? Math.round((bounces / visits) * 100) : 0;
const avgVisit = visits ? Math.round(totaltime / visits) : 0; // seconds
const pagesText =
topPages
.slice(0, 5)
.map(p => `${p.x || p.url || "?"}${p.y ?? p.value ?? 0}`)
.join("\n") || "—";
const label = hours === 24 ? "Last 24h" : `Last ${hours}h`;
const body = [
`👥 Visitors: ${visitors}`,
`🔁 Visits: ${visits}`,
`📄 Page views: ${pageviews}`,
`↩️ Bounce rate: ${bounceRate}%`,
`⏱ Avg visit: ${avgVisit}s`,
`🟢 Active now: ${active}`,
``,
`Top pages:`,
pagesText,
].join("\n");
const delivered = await sendAlert("info", `📊 Visitor summary — ${label}`, body, { silent: true });
return NextResponse.json({
ok: true,
delivered,
summary: { visitors, visits, pageviews, bounceRate, avgVisit, active },
});
}

View file

@ -0,0 +1,27 @@
import { NextResponse } from "next/server";
import { sql } from "@/db";
/**
* GET /api/healthz public, unauthenticated liveness probe.
*
* For external monitors (Uptime Kuma) and the Docker / Dokploy healthcheck.
* Returns NO sensitive detail only whether the app can reach the database.
* 200 { status: "ok", db: true }
* 503 { status: "down", db: false }
*/
export const dynamic = "force-dynamic";
export async function GET() {
try {
await sql`SELECT 1`;
return NextResponse.json(
{ status: "ok", db: true, ts: new Date().toISOString() },
{ headers: { "Cache-Control": "no-store" } },
);
} catch {
return NextResponse.json(
{ status: "down", db: false, ts: new Date().toISOString() },
{ status: 503, headers: { "Cache-Control": "no-store" } },
);
}
}

96
src/lib/alert.ts Normal file
View file

@ -0,0 +1,96 @@
/**
* alert.ts operational alerts to Telegram (tiaBaby_Bot).
*
* Best-effort: every function swallows its own errors so a failed alert can
* never cascade into another failure (mirrors logError / logAudit).
*
* Env:
* TELEGRAM_BOT_TOKEN from @BotFather
* TELEGRAM_CHAT_ID chat / channel / group id to post into
*
* Get your chat id: send any message to the bot, then
* curl "https://api.telegram.org/bot<TOKEN>/getUpdates"
* and read result[].message.chat.id (negative for groups/channels)
*/
export type AlertLevel = "info" | "warn" | "error" | "fatal";
const EMOJI: Record<AlertLevel, string> = {
info: "🔵",
warn: "🟡",
error: "🔴",
fatal: "🚨",
};
/** Escape the three characters Telegram HTML parse-mode treats specially. */
function escapeHtml(s: string): string {
return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
}
function istNow(): string {
return new Date().toLocaleString("en-IN", {
timeZone: "Asia/Kolkata",
day: "numeric",
month: "short",
hour: "2-digit",
minute: "2-digit",
hour12: true,
});
}
export interface AlertOptions {
/** Key/value rows rendered (bold key) beneath the message. */
fields?: Record<string, string | number>;
/** Send without a notification sound. */
silent?: boolean;
}
/**
* Send an operational alert to Telegram. Never throws.
* @returns true if delivered, false if skipped or failed.
*/
export async function sendAlert(
level: AlertLevel,
title: string,
detail?: string,
opts: AlertOptions = {},
): Promise<boolean> {
const token = process.env.TELEGRAM_BOT_TOKEN;
const chatId = process.env.TELEGRAM_CHAT_ID;
if (!token || !chatId) {
console.warn("sendAlert: TELEGRAM_BOT_TOKEN / TELEGRAM_CHAT_ID not set — skipping");
return false;
}
const lines: string[] = [`${EMOJI[level]} <b>${escapeHtml(title)}</b>`];
if (detail) lines.push("", escapeHtml(detail.slice(0, 3500)));
if (opts.fields && Object.keys(opts.fields).length) {
lines.push("");
for (const [k, v] of Object.entries(opts.fields)) {
lines.push(`<b>${escapeHtml(k)}:</b> ${escapeHtml(String(v))}`);
}
}
lines.push("", `<i>🌸 Tia · ${istNow()} IST</i>`);
try {
const res = await fetch(`https://api.telegram.org/bot${token}/sendMessage`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
chat_id: chatId,
text: lines.join("\n"),
parse_mode: "HTML",
disable_web_page_preview: true,
disable_notification: !!opts.silent,
}),
});
if (!res.ok) {
console.error("sendAlert: Telegram API error", res.status, await res.text().catch(() => ""));
return false;
}
return true;
} catch (e) {
console.error("sendAlert failed:", e);
return false;
}
}