infra/seed_changedetection.py

153 lines
5.2 KiB
Python

#!/usr/bin/env python3
"""
seed_changedetection.py -- adds Indian RE regulator pages to ChangeDetection.
These are pages with NO RSS feed -- we watch for any content change.
Notifications route via Apprise -> Telegram.
Run: python3 seed_changedetection.py
"""
import requests
import sys
# ── Config ────────────────────────────────────────────────────────────────────
CD_URL = "https://watch.manohargupta.com"
CD_USER = "manohar"
CD_PASS = "your_changedetection_password" # the password you set with htpasswd
# Apprise notification URL -- change alerts go here -> Telegram
APPRISE_NOTIFY_URL = "https://manohar:your_apprise_password@notify.manohargupta.com/notify/apprise"
# ── Watch definitions ─────────────────────────────────────────────────────────
# Format: (title, url, check_interval_seconds, tag)
# Interval guide: 3600=1hr, 7200=2hr, 14400=4hr, 43200=12hr, 86400=24hr
WATCHES = [
# Regulators -- check every 2 hours
("CERC Orders",
"https://cercind.gov.in/orders.html",
7200, "regulatory"),
("CERC Regulations",
"https://cercind.gov.in/regulations.html",
7200, "regulatory"),
("MNRE Notifications",
"https://mnre.gov.in/notification",
14400, "regulatory"),
("MNRE Tenders",
"https://mnre.gov.in/tender",
14400, "regulatory"),
("SECI Tenders",
"https://seci.co.in/tenders.php",
14400, "regulatory"),
("SECI Results/Awards",
"https://seci.co.in/tenders-awards.php",
14400, "regulatory"),
# Policy -- check every 12 hours
("MoP Press Releases",
"https://powermin.gov.in/en/content/press-release",
43200, "policy"),
("PPAC Petroleum Data",
"https://ppac.gov.in/content/212_1_PricesandTaxes.aspx",
86400, "policy"),
]
# ── API helpers ───────────────────────────────────────────────────────────────
def api(method, path, **kwargs):
resp = requests.request(
method,
f"{CD_URL}/{path}",
auth=(CD_USER, CD_PASS),
headers={"Content-Type": "application/json"},
timeout=15,
verify=True,
**kwargs
)
return resp
def get_existing_urls():
"""Return set of URLs already being watched."""
resp = api("GET", "api/v1/watch")
if resp.status_code != 200:
print(f"[ERROR] Could not fetch watches: {resp.status_code}")
return set()
watches = resp.json()
return {w.get("url", "") for w in watches.values()}
def add_watch(title, url, interval, tag):
"""Add a watch. Returns (success, message)."""
payload = {
"url": url,
"title": title,
"time_between_check": {
"seconds": 0,
"minutes": 0,
"hours": interval // 3600,
"days": 0,
"weeks": 0,
},
"tag": tag,
# Route all change notifications through Apprise
"notification_urls": [APPRISE_NOTIFY_URL],
# Use the Browserless sidecar for JS-rendered pages
"fetch_backend": "playwright_chromium",
}
resp = api("POST", "api/v1/watch", json=payload)
if resp.status_code in (200, 201):
watch_id = resp.json().get("uuid", "?")
return True, f"added (id={watch_id})"
else:
try:
err = resp.json().get("error", resp.text[:100])
except Exception:
err = resp.text[:100]
return False, f"FAILED: {err}"
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
print(f"Connecting to {CD_URL}...")
resp = api("GET", "api/v1/watch")
if resp.status_code == 401:
print("Auth failed -- check CD_PASS in this script")
sys.exit(1)
elif resp.status_code != 200:
print(f"Connection failed: {resp.status_code}")
sys.exit(1)
existing_urls = get_existing_urls()
print(f"Found {len(existing_urls)} existing watches\n")
results = {"ok": [], "skip": [], "fail": []}
for title, url, interval, tag in WATCHES:
if url in existing_urls:
print(f" ⏭️ [{tag}] {title}: already watching")
results["skip"].append(title)
continue
ok, msg = add_watch(title, url, interval, tag)
status = "" if ok else ""
print(f" {status} [{tag}] {title}: {msg}")
if ok:
results["ok"].append(title)
else:
results["fail"].append((title, msg))
print(f"\n{'='*50}")
print(f"Done. {len(results['ok'])} added, {len(results['skip'])} skipped, {len(results['fail'])} failed.")
print("\nChangeDetection will run first checks within the next refresh cycle.")
print("To force immediate check: Dashboard -> select all -> Recheck")
if __name__ == "__main__":
main()