# Paperless-ngx — docs.manohargupta.com # OCR + full-text search for PDFs and Office docs (lender drafts, tariff schedules, etc.) # 5 containers: webserver, redis broker, postgres, tika (Office), gotenberg (PDF render). # Tika + Gotenberg add ~400 MB RAM but are essential for .docx/.xlsx indexing. # First boot is slow (~90s) -- DB migrations run before the web UI becomes available. services: # Redis: job queue between the web UI and the OCR/consumer worker paperless-broker: image: redis:7-alpine restart: unless-stopped volumes: - paperless_redis:/data networks: - paperless_internal # Postgres: document metadata, tags, correspondents, search index paperless-db: image: postgres:16-alpine restart: unless-stopped environment: POSTGRES_DB: paperless POSTGRES_USER: paperless POSTGRES_PASSWORD: ${PAPERLESS_DB_PASSWORD} volumes: - paperless_db_data:/var/lib/postgresql/data networks: - paperless_internal healthcheck: test: ["CMD-SHELL", "pg_isready -U paperless -d paperless"] interval: 10s timeout: 5s retries: 5 # Gotenberg: renders Office files (docx, xlsx) to PDF before OCR paperless-gotenberg: image: docker.io/gotenberg/gotenberg:8 restart: unless-stopped command: - "gotenberg" - "--chromium-disable-javascript=true" # Security: no JS execution - "--chromium-allow-list=file:///tmp/.*" # Only allow local file access networks: - paperless_internal # Tika: extracts text from Office formats that Gotenberg can't handle alone paperless-tika: image: docker.io/apache/tika:latest restart: unless-stopped networks: - paperless_internal # Main app: web UI + OCR worker + consumer (watches the consume volume) paperless: image: ghcr.io/paperless-ngx/paperless-ngx:latest restart: unless-stopped depends_on: paperless-db: condition: service_healthy paperless-broker: condition: service_started paperless-gotenberg: condition: service_started paperless-tika: condition: service_started environment: PAPERLESS_REDIS: redis://paperless-broker:6379 PAPERLESS_DBHOST: paperless-db PAPERLESS_DBNAME: paperless PAPERLESS_DBUSER: paperless PAPERLESS_DBPASS: ${PAPERLESS_DB_PASSWORD} # Secret key for Django session signing -- must be stable across restarts PAPERLESS_SECRET_KEY: ${PAPERLESS_SECRET_KEY} PAPERLESS_URL: https://docs.manohargupta.com # Office doc support via Tika + Gotenberg PAPERLESS_TIKA_ENABLED: "1" PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://paperless-gotenberg:3000 PAPERLESS_TIKA_ENDPOINT: http://paperless-tika:9998 # OCR: 'skip' means don't re-OCR docs that already have a text layer (faster) # Add '+hin' to language if you have Hindi documents: eng+hin (adds ~200 MB) PAPERLESS_OCR_LANGUAGE: eng PAPERLESS_OCR_MODE: skip PAPERLESS_TIME_ZONE: Asia/Kolkata USERMAP_UID: "1000" USERMAP_GID: "1000" volumes: - paperless_data:/usr/src/paperless/data # search index, models - paperless_media:/usr/src/paperless/media # original files + thumbnails - paperless_export:/usr/src/paperless/export # manual export target - paperless_consume:/usr/src/paperless/consume # drop files here to auto-ingest networks: - dokploy-network - paperless_internal labels: - "traefik.enable=true" - "traefik.docker.network=dokploy-network" - "traefik.http.routers.paperless.rule=Host(`docs.manohargupta.com`)" - "traefik.http.routers.paperless.entrypoints=websecure" - "traefik.http.routers.paperless.tls.certresolver=letsencrypt" - "traefik.http.services.paperless.loadbalancer.server.port=8000" deploy: labels: - "traefik.enable=true" - "traefik.docker.network=dokploy-network" - "traefik.http.routers.paperless.rule=Host(`docs.manohargupta.com`)" - "traefik.http.routers.paperless.entrypoints=websecure" - "traefik.http.routers.paperless.tls.certresolver=letsencrypt" - "traefik.http.services.paperless.loadbalancer.server.port=8000" volumes: paperless_redis: paperless_db_data: paperless_data: paperless_media: paperless_export: paperless_consume: networks: dokploy-network: external: true paperless_internal: driver: bridge