Compare commits

..

No commits in common. "main" and "manohar-patch-1" have entirely different histories.

9 changed files with 0 additions and 422 deletions

View file

@ -1,50 +0,0 @@
# ============================================================================
# ha-proxy -- ha.manohargupta.com (runs on HETZNER via DOKPLOY)
# ----------------------------------------------------------------------------
# Home Assistant runs at HOME. This stack is ONLY a reverse-proxy bridge:
#
# Browser --TLS--> Traefik (Hetzner) --> ha-proxy (nginx) --tailnet--> HA @ home
#
# WHY nginx and not a plain Traefik route to the home IP:
# - Traefik (Dokploy's) sits on the dokploy-network swarm OVERLAY, which can't
# cleanly egress to a tailnet peer. nginx here is dual-homed: it takes
# ingress from Traefik on dokploy-network, and egresses to the home box
# over a local BRIDGE network (ha_egress) whose gateway is the Hetzner host
# -- the host then routes to tailscale0. This is the same dokploy-network +
# bridge pattern your n8n stack already uses successfully.
# - nginx also handles the WebSocket upgrade HA's frontend depends on.
#
# RAM cost on Hetzner: ~10-15 MB (nginx:alpine). The heavy part stays home.
# ============================================================================
services:
ha-proxy:
image: nginx:1.27-alpine
restart: unless-stopped
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
networks:
- dokploy-network # ingress: Traefik routes ha.manohargupta.com here
- ha_egress # egress: container -> host -> tailscale0 -> home box
# --- Container-level labels (docker provider) ---
labels:
- "traefik.enable=true"
- "traefik.docker.network=dokploy-network"
- "traefik.http.routers.ha.rule=Host(`ha.manohargupta.com`)"
- "traefik.http.routers.ha.entrypoints=websecure"
- "traefik.http.routers.ha.tls.certresolver=letsencrypt"
- "traefik.http.services.ha.loadbalancer.server.port=80"
# --- Service-level labels (swarm provider) -- Dokploy deploys as swarm stack ---
deploy:
labels:
- "traefik.enable=true"
- "traefik.docker.network=dokploy-network"
- "traefik.http.routers.ha.rule=Host(`ha.manohargupta.com`)"
- "traefik.http.routers.ha.entrypoints=websecure"
- "traefik.http.routers.ha.tls.certresolver=letsencrypt"
- "traefik.http.services.ha.loadbalancer.server.port=80"
networks:
dokploy-network:
external: true
ha_egress:
driver: bridge

View file

@ -1,47 +0,0 @@
# nginx.conf -- ha-proxy (Hetzner, behind Traefik)
# Forwards ha.manohargupta.com -> Home Assistant on the home box over Tailscale.
# Traefik terminates TLS; this listens plain HTTP on :80 inside the network.
worker_processes 1;
events { worker_connections 256; }
http {
# --- WebSocket upgrade plumbing -------------------------------------
# HA's frontend uses a persistent WebSocket (/api/websocket). Without this
# map the UI loads then hangs "Connecting...". The map sets the Connection
# header to "upgrade" only when the client requested an upgrade.
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
# !!! EDIT THIS: the home box's TAILSCALE IP (100.x.y.z), port 8123 !!!
# Find it after the home box joins your tailnet: `tailscale ip -4` on that box.
upstream homeassistant {
server 100.XX.XX.XX:8123;
}
server {
listen 80;
server_name ha.manohargupta.com;
location / {
proxy_pass http://homeassistant;
proxy_http_version 1.1;
# WebSocket upgrade
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
# Preserve host + client info so HA's trusted_proxies check passes
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# HA long-lived connections: don't cut them off early
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
proxy_buffering off;
}
}
}

View file

@ -1,34 +0,0 @@
# Home Assistant — ha.manohargupta.com
HA runs **at home**; Hetzner only proxies the subdomain. Two deploy targets:
- `home-assistant/` → HOME BOX (Pi/N100), plain `docker compose up -d`. NOT Dokploy.
- `ha-proxy/` → HETZNER via Dokploy (nginx reverse-proxy over Tailscale).
## Deploy order
1. **Home box**: `docker compose up -d` → onboard HA at `http://<lan-ip>:8123`
paste `configuration.snippet.yaml` into `config/configuration.yaml` → restart.
2. Join home box to Tailscale; `tailscale ip -4` → note the 100.x IP.
`ufw allow in on tailscale0 to any port 8123` on the home box.
3. **DNS**: `ha` A-record → `77.42.82.225` (done).
4. **Hetzner**: set the home tailnet IP in `ha-proxy/nginx.conf`, deploy `ha-proxy`
as a Dokploy Compose app.
## Dead-man's-switch — verify BEFORE trusting the cert flow
From inside the running ha-proxy container:
```
wget -qO- http://100.XX.XX.XX:8123 | head -c 200
```
HTML back → good. Hang/refused → host isn't forwarding container→tailnet; check
`ip route get 100.XX.XX.XX` resolves via tailscale0 + Docker bridge MASQUERADE.
## Why this shape
- HA needs `network_mode: host` (mDNS/Matter multicast + LAN RTSP for Tapo).
- An HA instance in Helsinki cannot reach home-LAN cameras — hence home box.
- ha-proxy is dual-homed (dokploy-network ingress + bridge egress), same pattern
as the n8n stack, because the swarm overlay can't egress to a tailnet peer.
## Security (flagged)
Public subdomain = HA login is internet-facing. Enable HA 2FA; consider Authentik
forward-auth later. Tailnet-only access (skip the public route) is safer if you
only reach HA from your own devices.

View file

@ -1,18 +0,0 @@
# configuration.yaml SNIPPET -- paste into ./config/configuration.yaml on the HOME box
# ----------------------------------------------------------------------------
# WHY this is required:
# HA refuses proxied requests by default (anti-spoofing). When ha-proxy forwards
# from Hetzner over the tailnet, the request reaches HA with a SOURCE IP equal to
# the Hetzner node's tailnet IP (after the host masquerades it out tailscale0).
# You must whitelist that IP, or every page load fails with HTTP 400
# "received from untrusted proxy / IP address not allowed".
#
# If you hit a 400, check the HA log -- it prints the exact rejected IP. Put THAT
# IP in trusted_proxies (it should be 100.75.128.45, your Hetzner tailnet IP).
http:
use_x_forwarded_for: true
trusted_proxies:
- 100.75.128.45 # Hetzner (manohar-ubuntu) tailnet IP = the proxy's source
- 127.0.0.1
- ::1

View file

@ -1,46 +0,0 @@
# ============================================================================
# Home Assistant + Matter Server -- runs on the HOME BOX (Pi 5 / N100)
# ----------------------------------------------------------------------------
# THIS DOES NOT RUN ON HETZNER / DOKPLOY.
# It lives in the infra repo for source-control + documentation, but it is
# deployed by hand on the home machine: `docker compose up -d`
#
# WHY host networking (network_mode: host) is mandatory here:
# - Tapo cameras are reached over your LAN (RTSP 554 / ONVIF 2020). HA must
# sit on the same L2 segment to discover + stream them.
# - mDNS / SSDP / Matter commissioning are MULTICAST. Bridge networking
# drops multicast at the container boundary, so discovery silently fails.
# With host mode the container shares the home box's network stack directly.
# ============================================================================
services:
homeassistant:
image: ghcr.io/home-assistant/home-assistant:stable
container_name: homeassistant
restart: unless-stopped
network_mode: host # see header note -- non-negotiable for HA
volumes:
- ./config:/config # all HA state + configuration.yaml lives here
- /etc/localtime:/etc/localtime:ro
- /run/dbus:/run/dbus:ro # lets HA see host Bluetooth/dbus (harmless if unused)
environment:
- TZ=Asia/Kolkata
# NOTE: no `ports:` needed -- host mode already exposes :8123 on the box.
# NOTE: no `privileged` -- only required if you later pass a USB Zigbee/Thread
# dongle, which would also need a `devices:` mapping. Not needed for
# WiFi / RTSP / Matter-over-WiFi devices.
# --- Matter controller (you mentioned Matter devices) -------------------
# HA talks to this over ws://localhost:5580 (add the "Matter" integration in
# the HA UI and point it there). Comment this whole block out if you want to
# hold off on Matter for now -- Tapo cameras do NOT need it.
matter-server:
image: ghcr.io/home-assistant-libs/python-matter-server:stable
container_name: matter-server
restart: unless-stopped
network_mode: host # Matter commissioning needs multicast too
security_opt:
- apparmor=unconfined # required for the Matter stack's raw network access
volumes:
- ./matter-data:/data
environment:
- TZ=Asia/Kolkata

View file

@ -1,41 +0,0 @@
# =============================================================================
# 10-livesync.ini — CouchDB config overrides for Obsidian Self-hosted LiveSync
# Lands in /opt/couchdb/etc/local.d/ so it overrides base defaults.
# Reference: vrtmrz/obsidian-livesync "Setup your own server" docs.
# =============================================================================
[couchdb]
# Single machine, no clustering. Required for a standalone LiveSync server.
single_node = true
# Allow large notes/attachments (50 MB). Raise if you paste big PDFs/images.
max_document_size = 50000000
[chttpd]
# Listen on all interfaces inside the container (Traefik reaches it via the net).
bind_address = 0.0.0.0
# LiveSync streams revisions in big batches; default request cap is too small.
max_http_request_size = 4294967296
# Force every request to carry valid credentials — this is CouchDB's own auth,
# i.e. the reason we do NOT also put Traefik basicAuth in front of it.
require_valid_user = true
[chttpd_auth]
require_valid_user = true
[httpd]
WWW-Authenticate = Basic realm="couchdb"
# Required in CouchDB 3.x — without this the [cors] section below is ignored.
enable_cors = true
# --- CORS -------------------------------------------------------------------
# LiveSync runs inside browsers with DIFFERENT origins:
# desktop Obsidian -> app://obsidian.md
# mobile Obsidian -> capacitor://localhost
# Ignis -> https://notes.manohargupta.com
# So we must allow cross-origin credentialed requests from all of them.
[cors]
origins = *
credentials = true
headers = accept, authorization, content-type, origin, referer
methods = GET, PUT, POST, HEAD, DELETE
max_age = 3600

View file

@ -1,72 +0,0 @@
# =============================================================================
# obsidian-sync : CouchDB hub for Obsidian Self-hosted LiveSync
# Deploy as a Dokploy "Compose" app. Image is pulled (no build).
# Public endpoint: https://couchdb.manohargupta.com (CouchDB-native auth only)
# =============================================================================
services:
couchdb:
# Pin a known-good 3.x line. LiveSync needs >=3.2; 3.3 is well-tested.
image: couchdb:3.3
container_name: obsidian-couchdb
restart: unless-stopped
# Run as the couchdb user (uid 5984) from the start. This skips the
# entrypoint's "find /opt/couchdb \! -user couchdb -exec chown {}" step,
# which fails with EROFS when the :ro bind-mounted ini file is encountered —
# causing an immediate silent exit before any log is written.
user: "5984:5984"
# Admin credentials come from Dokploy's Environment tab (NOT hard-coded here,
# so they never land in git). Set COUCHDB_USER / COUCHDB_PASSWORD in the UI.
environment:
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}
volumes:
# Persistent database files (named volume, survives redeploys).
- couchdb-data:/opt/couchdb/data
# LiveSync-tuned config. Mounted into the *.d override dir so it layers on
# top of CouchDB's defaults without us editing the base file.
- ./couchdb/local.ini:/opt/couchdb/etc/local.d/10-livesync.ini:ro
# mem_limit intentionally omitted: on a swap-heavy box the cgroup OOM killer
# fires before the Erlang VM writes its first log line (kernel kills it, so
# docker inspect shows OOMKilled:false — misleading). Re-add once swap < 50%.
healthcheck:
# _up requires auth because require_valid_user = true covers all endpoints.
# CMD-SHELL lets the container shell expand $COUCHDB_USER/$COUCHDB_PASSWORD
# (the $$ in YAML becomes $ after Compose substitution, then the shell runs it).
test: ["CMD-SHELL", "curl -f -u $$COUCHDB_USER:$$COUCHDB_PASSWORD http://localhost:5984/_up"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
networks:
- dokploy-network
labels:
- traefik.enable=true
# --- HTTP router on :80 (serves the ACME challenge; matches your Dokploy
# convention seen on position-tracker). CouchDB 401s unauth requests
# itself, so no auth middleware needed here. ---
- traefik.http.routers.obsidian-couchdb-http.rule=Host(`couchdb.manohargupta.com`)
- traefik.http.routers.obsidian-couchdb-http.entrypoints=web
# --- HTTPS router on :443 ---
- traefik.http.routers.obsidian-couchdb.rule=Host(`couchdb.manohargupta.com`)
- traefik.http.routers.obsidian-couchdb.entrypoints=websecure
- traefik.http.routers.obsidian-couchdb.tls=true
- traefik.http.routers.obsidian-couchdb.tls.certresolver=letsencrypt
# --- Service: CouchDB listens on 5984 ---
- traefik.http.services.obsidian-couchdb.loadbalancer.server.port=5984
# Tell Traefik which network to reach the container on (overlay).
- traefik.docker.network=dokploy-network
# NOTE: deliberately NO basicauth middleware here. CouchDB does its own auth.
networks:
dokploy-network:
external: true
volumes:
couchdb-data:

View file

@ -1,71 +0,0 @@
---
name: infra-ops
description: >
Canonical conventions for Manohar's self-hosted infrastructure (Hetzner CX32 +
Dokploy + Tailscale + Forgejo). Use whenever creating or editing a service,
writing a Dokploy compose file, running SSH ops on the server, deploying via
Forgejo, or touching networking/UFW. Encodes the script-first workflow, compose
label requirements, overlay-vs-bridge networking rules, and the deploy loop so
these directions never need restating.
---
# Infra Ops — house style
## Server
- Host `manohar-ubuntu`: Hetzner CX32 (4 vCPU / 7.6 GB / 75 GB), Ubuntu 24, Docker 29, Helsinki.
- SSH (Tailscale-only; user is always `root`):
```
SSH_AUTH_SOCK=$(launchctl getenv SSH_AUTH_SOCK) ssh -i ~/.ssh/id_ed25519 root@100.75.128.45 'bash -s' < /local/script.sh
```
- Tailscale IP `100.75.128.45` | public IPv4 `77.42.82.225`
- NEVER use `-t` (no pseudo-TTY). NEVER heredoc over SSH.
- Tailscale node idle = online, not down. Re-auth prompt is normal: approve, then kill+restart any wedged session.
## Script-first (never deviate)
- Write scripts locally to `~/MyProjects/` via Desktop Commander `write_file` (NOT the sandbox).
- Execute remotely via the ssh pipe above (`'bash -s' < script.sh`).
- Never patch files in place on the server bypassing git.
- Backup-before-change: write a rollback script to `/opt/<service>/` before modifying configs.
- Dead-man's-switch for risky ops: a verify step that proves success before the change is trusted.
## Dokploy compose conventions
Dokploy deploys compose as a **swarm stack**, so Traefik routing needs BOTH label sets:
- container-level `labels:` (docker provider) AND `deploy: labels:` (swarm provider) — mirror them exactly.
- No `container_name:` (swarm assigns names).
- Attach `dokploy-network` (`external: true`) for Traefik ingress.
- Deploy only through the Dokploy UI (not `docker stack deploy` by hand).
- `/etc/dokploy/compose/*/code/` is OVERWRITTEN on every redeploy — never treat it as source of truth.
- Standard Traefik labels (replace SVC / HOST / PORT):
```
traefik.enable=true
traefik.docker.network=dokploy-network
traefik.http.routers.SVC.rule=Host(`HOST`)
traefik.http.routers.SVC.entrypoints=websecure
traefik.http.routers.SVC.tls.certresolver=letsencrypt
traefik.http.services.SVC.loadbalancer.server.port=PORT
```
- Scaffold to copy: `templates/dokploy-service.compose.yml`
## Networking (the rules that bite)
- `dokploy-network` is a swarm **OVERLAY** → containers on it CANNOT reach the host
(not `10.0.1.1`, not the Tailscale IP) and cannot cleanly egress to a tailnet peer.
- To reach the host OR a tailnet peer from a container, give it a second **bridge**
network; its gateway (`172.x.0.1`) is the host, which then routes/masquerades out.
Precedents: n8n → `172.19.0.1`; tiger-bridge `tiger-net``172.18.0.1`; ha-proxy uses this for tailnet egress.
- UFW: `ufw allow` covers bridge subnets (172.x). It does NOT expose docker-published
ports — those need `ufw-docker allow PORT` (DOCKER-USER chain).
- Always `ufw reload` after rule changes; verify with `iptables -L ufw-user-input -n -v`.
## Deploy loop
- Git-driven services: source in `~/MyProjects/<svc>/`, Forgejo remote `git.manohargupta.com/manohar/<svc>`.
Push → Forgejo webhook → Dokploy rebuild. No manual server steps.
- infra repo = local `~/MyProjects/deployments/` (remote `manohar/infra`), pushes over HTTPS:443.
Flat `*.compose.yml` files and per-service subfolders are both fine.
- Manual (non-Dokploy) stacks — Tiger `/opt/tiger/`, LiteLLM, code-server — compose lives in the repo, deployed by hand.
## Working style
- Root cause before fix; state tradeoffs between fix paths.
- One mini-question / understanding check per major topic.
- Explicit risk flag before any change touching security, stability, or data.
- Token-efficient: batch ops, don't re-explain established context.
- Don't redo security hardening (UFW/ufw-docker/fail2ban/SSH) — it's done.

View file

@ -1,43 +0,0 @@
# ============================================================================
# TEMPLATE — Dokploy service on dokploy-network behind Traefik.
# Copy this, replace SVC / HOST / PORT / IMAGE, delete what you don't need.
# Dokploy deploys as a swarm stack, so BOTH label blocks below are required.
# See skills/infra-ops/SKILL.md for the full conventions.
# ============================================================================
services:
SVC:
image: IMAGE
restart: unless-stopped
# environment:
# KEY: ${KEY} # secrets via Dokploy env, never hard-coded
# volumes:
# - SVC_data:/data
networks:
- dokploy-network # Traefik ingress
# - SVC_internal # add a bridge for DB / host / tailnet egress
# --- container-level labels (docker provider) ---
labels:
- "traefik.enable=true"
- "traefik.docker.network=dokploy-network"
- "traefik.http.routers.SVC.rule=Host(`HOST`)"
- "traefik.http.routers.SVC.entrypoints=websecure"
- "traefik.http.routers.SVC.tls.certresolver=letsencrypt"
- "traefik.http.services.SVC.loadbalancer.server.port=PORT"
# --- service-level labels (swarm provider) — mirror of the above ---
deploy:
labels:
- "traefik.enable=true"
- "traefik.docker.network=dokploy-network"
- "traefik.http.routers.SVC.rule=Host(`HOST`)"
- "traefik.http.routers.SVC.entrypoints=websecure"
- "traefik.http.routers.SVC.tls.certresolver=letsencrypt"
- "traefik.http.services.SVC.loadbalancer.server.port=PORT"
# volumes:
# SVC_data:
networks:
dokploy-network:
external: true
# SVC_internal:
# driver: bridge