123 lines
4.4 KiB
YAML
123 lines
4.4 KiB
YAML
# Paperless-ngx — docs.manohargupta.com
|
|
# OCR + full-text search for PDFs and Office docs (lender drafts, tariff schedules, etc.)
|
|
# 5 containers: webserver, redis broker, postgres, tika (Office), gotenberg (PDF render).
|
|
# Tika + Gotenberg add ~400 MB RAM but are essential for .docx/.xlsx indexing.
|
|
# First boot is slow (~90s) -- DB migrations run before the web UI becomes available.
|
|
|
|
services:
|
|
|
|
# Redis: job queue between the web UI and the OCR/consumer worker
|
|
paperless-broker:
|
|
image: redis:7-alpine
|
|
restart: unless-stopped
|
|
volumes:
|
|
- paperless_redis:/data
|
|
networks:
|
|
- paperless_internal
|
|
|
|
# Postgres: document metadata, tags, correspondents, search index
|
|
paperless-db:
|
|
image: postgres:16-alpine
|
|
restart: unless-stopped
|
|
environment:
|
|
POSTGRES_DB: paperless
|
|
POSTGRES_USER: paperless
|
|
POSTGRES_PASSWORD: ${PAPERLESS_DB_PASSWORD}
|
|
volumes:
|
|
- paperless_db_data:/var/lib/postgresql/data
|
|
networks:
|
|
- paperless_internal
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U paperless -d paperless"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# Gotenberg: renders Office files (docx, xlsx) to PDF before OCR
|
|
paperless-gotenberg:
|
|
image: docker.io/gotenberg/gotenberg:8
|
|
restart: unless-stopped
|
|
command:
|
|
- "gotenberg"
|
|
- "--chromium-disable-javascript=true" # Security: no JS execution
|
|
- "--chromium-allow-list=file:///tmp/.*" # Only allow local file access
|
|
networks:
|
|
- paperless_internal
|
|
|
|
# Tika: extracts text from Office formats that Gotenberg can't handle alone
|
|
paperless-tika:
|
|
image: docker.io/apache/tika:latest
|
|
restart: unless-stopped
|
|
networks:
|
|
- paperless_internal
|
|
|
|
# Main app: web UI + OCR worker + consumer (watches the consume volume)
|
|
paperless:
|
|
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
|
restart: unless-stopped
|
|
depends_on:
|
|
paperless-db:
|
|
condition: service_healthy
|
|
paperless-broker:
|
|
condition: service_started
|
|
paperless-gotenberg:
|
|
condition: service_started
|
|
paperless-tika:
|
|
condition: service_started
|
|
environment:
|
|
PAPERLESS_REDIS: redis://paperless-broker:6379
|
|
PAPERLESS_DBHOST: paperless-db
|
|
PAPERLESS_DBNAME: paperless
|
|
PAPERLESS_DBUSER: paperless
|
|
PAPERLESS_DBPASS: ${PAPERLESS_DB_PASSWORD}
|
|
# Secret key for Django session signing -- must be stable across restarts
|
|
PAPERLESS_SECRET_KEY: ${PAPERLESS_SECRET_KEY}
|
|
PAPERLESS_URL: https://docs.manohargupta.com
|
|
# Office doc support via Tika + Gotenberg
|
|
PAPERLESS_TIKA_ENABLED: "1"
|
|
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://paperless-gotenberg:3000
|
|
PAPERLESS_TIKA_ENDPOINT: http://paperless-tika:9998
|
|
# OCR: 'skip' means don't re-OCR docs that already have a text layer (faster)
|
|
# Add '+hin' to language if you have Hindi documents: eng+hin (adds ~200 MB)
|
|
PAPERLESS_OCR_LANGUAGE: eng
|
|
PAPERLESS_OCR_MODE: skip
|
|
PAPERLESS_TIME_ZONE: Asia/Kolkata
|
|
USERMAP_UID: "1000"
|
|
USERMAP_GID: "1000"
|
|
volumes:
|
|
- paperless_data:/usr/src/paperless/data # search index, models
|
|
- paperless_media:/usr/src/paperless/media # original files + thumbnails
|
|
- paperless_export:/usr/src/paperless/export # manual export target
|
|
- paperless_consume:/usr/src/paperless/consume # drop files here to auto-ingest
|
|
networks:
|
|
- dokploy-network
|
|
- paperless_internal
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.docker.network=dokploy-network"
|
|
- "traefik.http.routers.paperless.rule=Host(`docs.manohargupta.com`)"
|
|
- "traefik.http.routers.paperless.entrypoints=websecure"
|
|
- "traefik.http.routers.paperless.tls.certresolver=letsencrypt"
|
|
- "traefik.http.services.paperless.loadbalancer.server.port=8000"
|
|
deploy:
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.docker.network=dokploy-network"
|
|
- "traefik.http.routers.paperless.rule=Host(`docs.manohargupta.com`)"
|
|
- "traefik.http.routers.paperless.entrypoints=websecure"
|
|
- "traefik.http.routers.paperless.tls.certresolver=letsencrypt"
|
|
- "traefik.http.services.paperless.loadbalancer.server.port=8000"
|
|
|
|
volumes:
|
|
paperless_redis:
|
|
paperless_db_data:
|
|
paperless_data:
|
|
paperless_media:
|
|
paperless_export:
|
|
paperless_consume:
|
|
|
|
networks:
|
|
dokploy-network:
|
|
external: true
|
|
paperless_internal:
|
|
driver: bridge
|