182 lines
6.5 KiB
Python
182 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
seed_miniflux.py -- populates Miniflux with Indian RE + finance RSS feeds.
|
|
Uses Miniflux API with basic auth.
|
|
Run: python3 seed_miniflux.py
|
|
|
|
Feed categories:
|
|
- Regulatory & Policy
|
|
- Industry & Analysis
|
|
- Markets
|
|
- Macro & Economy
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import sys
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
MINIFLUX_URL = "https://feeds.manohargupta.com"
|
|
MINIFLUX_USER = "manohar"
|
|
MINIFLUX_PASS = "QwEzVucn8Mm+ONROWPDGpA=="
|
|
|
|
# ── Feed definitions ──────────────────────────────────────────────────────────
|
|
# Format: (title, url, category, crawler)
|
|
# crawler=True for JS-heavy sites that need full-page fetch
|
|
FEEDS = [
|
|
|
|
# ── Regulatory & Policy ───────────────────────────────────────────────────
|
|
("PIB - New & Renewable Energy",
|
|
"https://pib.gov.in/RssMain.aspx?ModId=6&Lang=1&Regid=3",
|
|
"Regulatory & Policy", False),
|
|
|
|
("PIB - Power Ministry",
|
|
"https://pib.gov.in/RssMain.aspx?ModId=7&Lang=1&Regid=3",
|
|
"Regulatory & Policy", False),
|
|
|
|
# ── Industry & Analysis ───────────────────────────────────────────────────
|
|
("Mercom India",
|
|
"https://mercomindia.com/feed/",
|
|
"Industry & Analysis", False),
|
|
|
|
("IEEFA",
|
|
"https://ieefa.org/feed/",
|
|
"Industry & Analysis", False),
|
|
|
|
("PV Magazine India",
|
|
"https://www.pv-magazine-india.com/feed/",
|
|
"Industry & Analysis", False),
|
|
|
|
("CleanTechnica",
|
|
"https://cleantechnica.com/feed/",
|
|
"Industry & Analysis", False),
|
|
|
|
("Bridge to India",
|
|
"https://bridgetoindia.com/feed/",
|
|
"Industry & Analysis", False),
|
|
|
|
# ── Markets ───────────────────────────────────────────────────────────────
|
|
("Economic Times - Energy",
|
|
"https://energy.economictimes.indiatimes.com/rss/topstories",
|
|
"Markets", False),
|
|
|
|
("Moneycontrol - Latest News",
|
|
"https://www.moneycontrol.com/rss/latestnews.xml",
|
|
"Markets", False),
|
|
|
|
("Livemint - Companies",
|
|
"https://www.livemint.com/rss/companies",
|
|
"Markets", False),
|
|
|
|
# ── Macro & Economy ───────────────────────────────────────────────────────
|
|
("RBI Press Releases",
|
|
"https://rbi.org.in/Scripts/RSS.aspx?Id=15",
|
|
"Macro & Economy", False),
|
|
|
|
("IMF - South Asia",
|
|
"https://www.imf.org/en/News/rss?language=eng",
|
|
"Macro & Economy", False),
|
|
|
|
("CEEW",
|
|
"https://www.ceew.in/rss.xml",
|
|
"Macro & Economy", False),
|
|
]
|
|
|
|
|
|
# ── API helpers ───────────────────────────────────────────────────────────────
|
|
|
|
def api(method, path, **kwargs):
|
|
resp = requests.request(
|
|
method,
|
|
f"{MINIFLUX_URL}/v1{path}",
|
|
auth=(MINIFLUX_USER, MINIFLUX_PASS),
|
|
headers={"Content-Type": "application/json"},
|
|
timeout=15,
|
|
**kwargs
|
|
)
|
|
return resp
|
|
|
|
|
|
def get_or_create_category(name, existing):
|
|
"""Return category ID, creating it if it doesn't exist."""
|
|
for cat in existing:
|
|
if cat["title"] == name:
|
|
return cat["id"]
|
|
resp = api("POST", "/categories", json={"title": name})
|
|
if resp.status_code == 201:
|
|
cat_id = resp.json()["id"]
|
|
print(f" [CAT] Created category: {name} (id={cat_id})")
|
|
return cat_id
|
|
else:
|
|
print(f" [ERROR] Could not create category {name}: {resp.text}")
|
|
return None
|
|
|
|
|
|
def add_feed(title, url, category_id, crawler):
|
|
"""Add a feed. Returns (success, message)."""
|
|
payload = {
|
|
"feed_url": url,
|
|
"category_id": category_id,
|
|
"crawler": crawler, # fetch full page for JS-heavy sites
|
|
"user_agent": "Mozilla/5.0 (compatible; Miniflux)",
|
|
}
|
|
resp = api("POST", "/feeds", json=payload)
|
|
if resp.status_code == 201:
|
|
feed_id = resp.json()["feed_id"]
|
|
return True, f"added (id={feed_id})"
|
|
elif resp.status_code == 409:
|
|
return True, "already exists"
|
|
else:
|
|
try:
|
|
err = resp.json().get("error_message", resp.text)
|
|
except Exception:
|
|
err = resp.text
|
|
return False, f"FAILED: {err}"
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
print(f"Connecting to {MINIFLUX_URL}...")
|
|
me = api("GET", "/me")
|
|
if me.status_code != 200:
|
|
print(f"Auth failed: {me.status_code} {me.text}")
|
|
sys.exit(1)
|
|
print(f"Authenticated as: {me.json()['username']}\n")
|
|
|
|
# Fetch existing categories
|
|
cats_resp = api("GET", "/categories")
|
|
existing_cats = cats_resp.json() if cats_resp.status_code == 200 else []
|
|
|
|
results = {"ok": [], "fail": []}
|
|
|
|
for title, url, category_name, crawler in FEEDS:
|
|
cat_id = get_or_create_category(category_name, existing_cats)
|
|
# Refresh category list after creation
|
|
cats_resp = api("GET", "/categories")
|
|
existing_cats = cats_resp.json() if cats_resp.status_code == 200 else existing_cats
|
|
|
|
if not cat_id:
|
|
results["fail"].append((title, "no category"))
|
|
continue
|
|
|
|
ok, msg = add_feed(title, url, cat_id, crawler)
|
|
status = "✅" if ok else "❌"
|
|
print(f" {status} [{category_name}] {title}: {msg}")
|
|
if ok:
|
|
results["ok"].append(title)
|
|
else:
|
|
results["fail"].append((title, msg))
|
|
|
|
print(f"\n{'='*50}")
|
|
print(f"Done. {len(results['ok'])} added, {len(results['fail'])} failed.")
|
|
if results["fail"]:
|
|
print("\nFailed feeds (check URL or site availability):")
|
|
for title, reason in results["fail"]:
|
|
print(f" - {title}: {reason}")
|
|
print("\nMiniflux will fetch all feeds in the next refresh cycle (~1 hour).")
|
|
print("Force a refresh: Feeds → select all → Refresh")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|