From d9f4a244f0f41841e455a0d11e1bc88e6de92f36 Mon Sep 17 00:00:00 2001 From: Mannu Date: Thu, 11 Jun 2026 10:30:52 +0530 Subject: [PATCH] Add infra-ops skill + Dokploy compose template (house-style source of truth) --- skills/infra-ops/SKILL.md | 71 +++++++++++++++++++++++++++ templates/dokploy-service.compose.yml | 43 ++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 skills/infra-ops/SKILL.md create mode 100644 templates/dokploy-service.compose.yml diff --git a/skills/infra-ops/SKILL.md b/skills/infra-ops/SKILL.md new file mode 100644 index 0000000..d6c445a --- /dev/null +++ b/skills/infra-ops/SKILL.md @@ -0,0 +1,71 @@ +--- +name: infra-ops +description: > + Canonical conventions for Manohar's self-hosted infrastructure (Hetzner CX32 + + Dokploy + Tailscale + Forgejo). Use whenever creating or editing a service, + writing a Dokploy compose file, running SSH ops on the server, deploying via + Forgejo, or touching networking/UFW. Encodes the script-first workflow, compose + label requirements, overlay-vs-bridge networking rules, and the deploy loop so + these directions never need restating. +--- + +# Infra Ops — house style + +## Server +- Host `manohar-ubuntu`: Hetzner CX32 (4 vCPU / 7.6 GB / 75 GB), Ubuntu 24, Docker 29, Helsinki. +- SSH (Tailscale-only; user is always `root`): + ``` + SSH_AUTH_SOCK=$(launchctl getenv SSH_AUTH_SOCK) ssh -i ~/.ssh/id_ed25519 root@100.75.128.45 'bash -s' < /local/script.sh + ``` + - Tailscale IP `100.75.128.45` | public IPv4 `77.42.82.225` + - NEVER use `-t` (no pseudo-TTY). NEVER heredoc over SSH. + - Tailscale node idle = online, not down. Re-auth prompt is normal: approve, then kill+restart any wedged session. + +## Script-first (never deviate) +- Write scripts locally to `~/MyProjects/` via Desktop Commander `write_file` (NOT the sandbox). +- Execute remotely via the ssh pipe above (`'bash -s' < script.sh`). +- Never patch files in place on the server bypassing git. +- Backup-before-change: write a rollback script to `/opt//` before modifying configs. +- Dead-man's-switch for risky ops: a verify step that proves success before the change is trusted. + +## Dokploy compose conventions +Dokploy deploys compose as a **swarm stack**, so Traefik routing needs BOTH label sets: +- container-level `labels:` (docker provider) AND `deploy: labels:` (swarm provider) — mirror them exactly. +- No `container_name:` (swarm assigns names). +- Attach `dokploy-network` (`external: true`) for Traefik ingress. +- Deploy only through the Dokploy UI (not `docker stack deploy` by hand). +- `/etc/dokploy/compose/*/code/` is OVERWRITTEN on every redeploy — never treat it as source of truth. +- Standard Traefik labels (replace SVC / HOST / PORT): + ``` + traefik.enable=true + traefik.docker.network=dokploy-network + traefik.http.routers.SVC.rule=Host(`HOST`) + traefik.http.routers.SVC.entrypoints=websecure + traefik.http.routers.SVC.tls.certresolver=letsencrypt + traefik.http.services.SVC.loadbalancer.server.port=PORT + ``` +- Scaffold to copy: `templates/dokploy-service.compose.yml` + +## Networking (the rules that bite) +- `dokploy-network` is a swarm **OVERLAY** → containers on it CANNOT reach the host + (not `10.0.1.1`, not the Tailscale IP) and cannot cleanly egress to a tailnet peer. +- To reach the host OR a tailnet peer from a container, give it a second **bridge** + network; its gateway (`172.x.0.1`) is the host, which then routes/masquerades out. + Precedents: n8n → `172.19.0.1`; tiger-bridge `tiger-net` → `172.18.0.1`; ha-proxy uses this for tailnet egress. +- UFW: `ufw allow` covers bridge subnets (172.x). It does NOT expose docker-published + ports — those need `ufw-docker allow PORT` (DOCKER-USER chain). +- Always `ufw reload` after rule changes; verify with `iptables -L ufw-user-input -n -v`. + +## Deploy loop +- Git-driven services: source in `~/MyProjects//`, Forgejo remote `git.manohargupta.com/manohar/`. + Push → Forgejo webhook → Dokploy rebuild. No manual server steps. +- infra repo = local `~/MyProjects/deployments/` (remote `manohar/infra`), pushes over HTTPS:443. + Flat `*.compose.yml` files and per-service subfolders are both fine. +- Manual (non-Dokploy) stacks — Tiger `/opt/tiger/`, LiteLLM, code-server — compose lives in the repo, deployed by hand. + +## Working style +- Root cause before fix; state tradeoffs between fix paths. +- One mini-question / understanding check per major topic. +- Explicit risk flag before any change touching security, stability, or data. +- Token-efficient: batch ops, don't re-explain established context. +- Don't redo security hardening (UFW/ufw-docker/fail2ban/SSH) — it's done. diff --git a/templates/dokploy-service.compose.yml b/templates/dokploy-service.compose.yml new file mode 100644 index 0000000..63a88f6 --- /dev/null +++ b/templates/dokploy-service.compose.yml @@ -0,0 +1,43 @@ +# ============================================================================ +# TEMPLATE — Dokploy service on dokploy-network behind Traefik. +# Copy this, replace SVC / HOST / PORT / IMAGE, delete what you don't need. +# Dokploy deploys as a swarm stack, so BOTH label blocks below are required. +# See skills/infra-ops/SKILL.md for the full conventions. +# ============================================================================ +services: + SVC: + image: IMAGE + restart: unless-stopped + # environment: + # KEY: ${KEY} # secrets via Dokploy env, never hard-coded + # volumes: + # - SVC_data:/data + networks: + - dokploy-network # Traefik ingress + # - SVC_internal # add a bridge for DB / host / tailnet egress + # --- container-level labels (docker provider) --- + labels: + - "traefik.enable=true" + - "traefik.docker.network=dokploy-network" + - "traefik.http.routers.SVC.rule=Host(`HOST`)" + - "traefik.http.routers.SVC.entrypoints=websecure" + - "traefik.http.routers.SVC.tls.certresolver=letsencrypt" + - "traefik.http.services.SVC.loadbalancer.server.port=PORT" + # --- service-level labels (swarm provider) — mirror of the above --- + deploy: + labels: + - "traefik.enable=true" + - "traefik.docker.network=dokploy-network" + - "traefik.http.routers.SVC.rule=Host(`HOST`)" + - "traefik.http.routers.SVC.entrypoints=websecure" + - "traefik.http.routers.SVC.tls.certresolver=letsencrypt" + - "traefik.http.services.SVC.loadbalancer.server.port=PORT" + +# volumes: +# SVC_data: + +networks: + dokploy-network: + external: true + # SVC_internal: + # driver: bridge