#!/usr/bin/env python3
"""
Geocode škole iz Excel-a koristeći Mapbox API.
Pokrenuti JEDNOM na serveru. Rezultat se sprema u schools_geocoded.json
koji se zatim koristi u HTML-u mape (0 API poziva pri učitavanju stranice).

Zahtjevi:
    pip install pandas openpyxl requests

Pokretanje:
    python3 geocode_schools.py
"""

import json, time, sys
import pandas as pd
import requests

MAPBOX_TOKEN = "pk.eyJ1IjoidHJhZmZpYzI0c2F0YSIsImEiOiJjbG02MzExcG8yajRsM2pwNjhiOWw4YTF5In0.xt5_tl8a0MML-Lrok7c9XQ"
EXCEL_FILE   = "skole_bodovi.xlsx"
OUTPUT_JSON  = "schools_geocoded.json"
CACHE_FILE   = "geocode_cache.json"  # sprema napredak – sigurno od prekida

# ── učitaj Excel ──────────────────────────────────────────
df = pd.read_excel(EXCEL_FILE, sheet_name="Data")

schools_dict = {}
for _, row in df.iterrows():
    key = (str(row["Škola"]), str(row["Adresa"]))
    if key not in schools_dict:
        schools_dict[key] = {
            "skola":    str(row["Škola"]),
            "adresa":   str(row["Adresa"]),
            "grad":     str(row["Grad"]),
            "pbr":      str(row["Poštanski broj"]),
            "zupanija": str(row["Županija"]),
            "telefon":  str(row["Telefon"]) if pd.notna(row["Telefon"]) else "",
            "mail":     str(row["Mail"])    if pd.notna(row["Mail"])    else "",
            "web":      str(row["Web"])     if pd.notna(row["Web"])     else "",
            "programi": [],
            "lat": None, "lng": None,
        }
    schools_dict[key]["programi"].append({
        "naziv":      str(row["Program"]),
        "min_bodova": round(float(row["Broj bodova zadnjeg"]), 2) if pd.notna(row["Broj bodova zadnjeg"]) else None,
        "max_bodova": round(float(row["Broj bodova prvog"]),   2) if pd.notna(row["Broj bodova prvog"])   else None,
        "avg_bodova": round(float(row["Prosječni broj bodova"]),2) if pd.notna(row["Prosječni broj bodova"]) else None,
    })

schools = list(schools_dict.values())
for s in schools:
    mins = [p["min_bodova"] for p in s["programi"] if p["min_bodova"] is not None]
    maxs = [p["max_bodova"] for p in s["programi"] if p["max_bodova"] is not None]
    avgs = [p["avg_bodova"] for p in s["programi"] if p["avg_bodova"] is not None]
    s["min_bodova"] = round(min(mins), 2) if mins else None
    s["max_bodova"] = round(max(maxs), 2) if maxs else None
    s["avg_bodova"] = round(sum(avgs)/len(avgs), 2) if avgs else None

# ── učitaj cache (nastavak ako je skriptu prekinuto) ───────
try:
    with open(CACHE_FILE, encoding="utf-8") as f:
        cache = json.load(f)
    print(f"Cache učitan: {len(cache)} adresa")
except FileNotFoundError:
    cache = {}

# ── dedupliciraj adrese ────────────────────────────────────
unique_addrs = {}
for s in schools:
    ak = f"{s['adresa']}|{s['grad']}"
    if ak not in cache:
        unique_addrs[ak] = (s["adresa"], s["grad"])

print(f"Škola ukupno:      {len(schools)}")
print(f"Jedinstvenih adresa: {len(cache) + len(unique_addrs)}")
print(f"Za geokodirati:    {len(unique_addrs)}")

# ── geocoding ─────────────────────────────────────────────
session = requests.Session()
done = 0
failed = 0

for ak, (adresa, grad) in unique_addrs.items():
    q = f"{adresa}, {grad}, Hrvatska"
    url = "https://api.mapbox.com/geocoding/v5/mapbox.places/" + \
          requests.utils.quote(q) + \
          f".json?country=HR&limit=1&access_token={MAPBOX_TOKEN}"
    try:
        r = session.get(url, timeout=10)
        r.raise_for_status()
        feats = r.json().get("features", [])
        if feats:
            lng, lat = feats[0]["center"]
            cache[ak] = {"lat": round(lat, 6), "lng": round(lng, 6)}
        else:
            cache[ak] = None
            failed += 1
            print(f"  [NIJE NAĐENO] {q}")
    except Exception as e:
        cache[ak] = None
        failed += 1
        print(f"  [GREŠKA] {q}: {e}")

    done += 1
    if done % 50 == 0:
        # spremi cache svakih 50 radi sigurnosti
        with open(CACHE_FILE, "w", encoding="utf-8") as f:
            json.dump(cache, f, ensure_ascii=False)
        print(f"  ✓ {done}/{len(unique_addrs)} ({failed} failed)")
        sys.stdout.flush()

    time.sleep(0.05)  # 50ms pauza – ne urlaj API

# ── finalni cache save ─────────────────────────────────────
with open(CACHE_FILE, "w", encoding="utf-8") as f:
    json.dump(cache, f, ensure_ascii=False)

# ── upiši koordinate u škole ───────────────────────────────
no_coords = 0
for s in schools:
    ak = f"{s['adresa']}|{s['grad']}"
    coords = cache.get(ak)
    if coords:
        s["lat"] = coords["lat"]
        s["lng"] = coords["lng"]
    else:
        no_coords += 1

print(f"\nGotovo!")
print(f"  Geokodiranih škola: {len(schools) - no_coords}/{len(schools)}")
print(f"  Bez koordinata:     {no_coords}")

with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
    json.dump(schools, f, ensure_ascii=False)

print(f"\n→ Rezultat spreman u: {OUTPUT_JSON}")
print(f"→ Sada pokreni: python3 inject_data.py")