#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Remplit TOUTES les colonnes DATA du fichier variables (jamais les colonnes texte).
Climat : grilles Explore2 (Météo-France) — actuel=RWL20, +10 ans=(RWL20+RWL27)/2, +20 ans=RWL27,
pic=TXx niveau de retour 5 ans (RWL20). Ville hors maille exacte → moyenne des mailles voisines."""
import openpyxl, netCDF4, numpy as np, json, re, time, unicodedata, urllib.request, urllib.parse

CLIM="/opt/projects/covalba-refonte-assets/climat"
XLSX="public/documents/covalba-zones-seo-DEMO.xlsx"
TSV="docs/local-seo-pages-source.tsv"

# --- grilles ---
def load(path,var):
    ds=netCDF4.Dataset(path); return ds.variables['lat'][:],ds.variables['lon'][:],ds.variables[var][0]
G={}
for lvl,d in [("20","rwl20"),("27","rwl27"),("40","rwl40")]:
    G[("tx30",lvl)]=load(f"{CLIM}/{d}/TX30D_yr_RWL-{lvl}_TIMEavg_GEOxy_FR-Metro_EXPLORE2-2022_MF-ADAMONT_rcp85_ENSq50.nc","TX30D")
    G[("txm",lvl)] =load(f"{CLIM}/{d}/TXm_seas-JJA_RWL-{lvl}_TIMEavg_GEOxy_FR-Metro_EXPLORE2-2022_MF-ADAMONT_rcp85_ENSq50.nc","TXm")
    G[("txx",lvl)] =load(f"{CLIM}/{d}/TXx_lr5yr_RWL-{lvl}_TIMEavg_GEOxy_FR-Metro_EXPLORE2-2022_MF-ADAMONT_rcp85_ENSq50.nc","TXx")

def at(key,plat,plon,navg=4):
    lat,lon,v=G[key]
    d=(lat-plat)**2+(lon-plon)**2
    d=np.where(np.ma.getmaskarray(v),1e9,d)
    idx=np.argsort(d.ravel())[:navg]            # moyenne des mailles les plus proches
    vals=[v.ravel()[i] for i in idx if d.ravel()[i]<1e8]
    return float(np.mean(vals)) if vals else None

def climate(plat,plon):
    tx30_20,tx30_27=at(("tx30","20"),plat,plon),at(("tx30","27"),plat,plon)
    txm_20,txm_27 =at(("txm","20"),plat,plon)-273.15,at(("txm","27"),plat,plon)-273.15
    txx_20=at(("txx","20"),plat,plon)-273.15
    return dict(
        joursSup30C=round(tx30_20), tempMoyEstivale=round(txm_20,1), picHistorique=round(txx_20,1),
        joursSup30C10ans=round((tx30_20+tx30_27)/2), tempMoyEstivale10ans=round((txm_20+txm_27)/2,1),
        joursSup30C20ans=round(tx30_27), tempMoyEstivale20ans=round(txm_27,1))

# --- zones climatiques réglementaires (arrêté, par n° dept) ---
H2={"04","07","09","12","16","17","18","22","24","26","29","31","32","33","35","36","37","40","41","44","46","47","48","49","50","53","56","64","65","72","79","81","82","84","85","86"}
H3={"06","11","13","2A","2B","30","34","66","83"}
def hzone(num): return "H3" if num in H3 else ("H2" if num in H2 else "H1")

REGION_SLUG={"Île-de-France":"ile-de-france","Hauts-de-France":"hauts-de-france","Grand Est":"grand-est",
 "Normandie":"normandie","Bretagne":"bretagne","Pays de la Loire":"pays-de-la-loire",
 "Centre-Val de Loire":"centre-val-de-loire","Bourgogne-Franche-Comté":"bourgogne-franche-comte",
 "Nouvelle-Aquitaine":"nouvelle-aquitaine","Occitanie":"occitanie","Auvergne-Rhône-Alpes":"auvergne-rhone-alpes",
 "PACA":"provence-alpes-cote-d-azur","Corse":"corse"}
REGION_NAME={"PACA":"Provence-Alpes-Côte d'Azur"}

# --- géocodage (geo.api.gouv.fr), avec alias pour les zones non-communes ---
ALIAS={"Roissy":"Roissy-en-France","Garonor":"Aulnay-sous-Bois","La Plaine":"Saint-Denis",
 "Courtabœuf":"Villebon-sur-Yvette","Couloir de la chimie":"Feyzin","Rueil":"Rueil-Malmaison",
 "Flins":"Flins-sur-Seine","Bonneuil":"Bonneuil-sur-Marne","La Défense":"Puteaux"}
def geocode(nom,deptnum):
    q=ALIAS.get(nom,nom)
    url=f"https://geo.api.gouv.fr/communes?nom={urllib.parse.quote(q)}&fields=centre&codeDepartement={deptnum}&limit=1&boost=population"
    try:
        r=json.load(urllib.request.urlopen(url,timeout=10))
        if r: c=r[0]["centre"]["coordinates"]; return c[1],c[0]
        # retry sans filtre dept
        url=f"https://geo.api.gouv.fr/communes?nom={urllib.parse.quote(q)}&fields=centre&limit=1&boost=population"
        r=json.load(urllib.request.urlopen(url,timeout=10))
        if r: c=r[0]["centre"]["coordinates"]; return c[1],c[0]
    except Exception as e: pass
    return None

# --- lecture TSV ---
rows=[l.split('\t') for l in open(TSV,encoding='utf-8').read().splitlines()[1:] if l.strip()]
cities=[dict(region=r[0],dept=r[1],ville=r[2],typeZone=r[3],slugV=r[4],slugD=r[5]) for r in rows]
for c in cities:
    m=re.search(r"\(([0-9AB]+)\)",c["dept"]); c["deptNum"]=m.group(1) if m else ""
    c["deptNom"]=re.sub(r"\s*\([0-9AB]+\)","",c["dept"]).strip()

# géocode + climat
fails=[]
for i,c in enumerate(cities):
    g=geocode(c["ville"],c["deptNum"])
    if g: c["lat"],c["lon"]=g; c["clim"]=climate(*g)
    else: fails.append(c["ville"]); c["clim"]=None
    if i%25==0: print(f"  geocode {i}/{len(cities)}"); time.sleep(0.05)
print("échecs géocodage:",fails or "aucun")
# fallback : moyenne du département
from collections import defaultdict
bydept=defaultdict(list); [bydept[c["slugD"]].append(c) for c in cities if c["clim"]]
for c in cities:
    if not c["clim"]:
        sib=bydept.get(c["slugD"])
        if sib:
            keys=sib[0]["clim"].keys()
            c["clim"]={k:round(float(np.mean([s["clim"][k] for s in sib])),1) for k in keys}
            c["clim"]={k:(round(v) if "jours" in k else v) for k,v in c["clim"].items()}

# agrégats dept + région
def agg(lst):
    keys=lst[0]["clim"].keys()
    out={k:float(np.mean([x["clim"][k] for x in lst if x["clim"]])) for k in keys}
    return {k:(round(v) if "jours" in k else round(v,1)) for k,v in out.items()}
depts={}; regions=defaultdict(list)
for c in cities:
    depts.setdefault(c["slugD"],{"deptNom":c["deptNom"],"deptNum":c["deptNum"],"region":c["region"],"cities":[]})
    depts[c["slugD"]]["cities"].append(c)
for sd,d in depts.items():
    d["clim"]=agg(d["cities"]); d["typeZone"]=max(set(x["typeZone"] for x in d["cities"]),key=[x["typeZone"] for x in d["cities"]].count)
    regions[d["region"]].append(d)

# --- écriture Excel ---
wb=openpyxl.load_workbook(XLSX)
zs=wb["Zones"]; hdr=[c.value for c in zs[1]]
if "pays" not in hdr:
    zs.cell(row=1,column=len(hdr)+1,value="pays"); hdr.append("pays")
COL={h:i+1 for i,h in enumerate(hdr)}
existing={zs.cell(row=r,column=COL["slugVille"]).value or zs.cell(row=r,column=COL["slugDepartement"]).value or zs.cell(row=r,column=COL["slugRegion"]).value
          for r in range(2,zs.max_row+1)}
# pays=FR sur les lignes existantes
for r in range(2,zs.max_row+1):
    if not zs.cell(row=r,column=COL["pays"]).value: zs.cell(row=r,column=COL["pays"],value="FR")

def put(d):  # n'écrit QUE les colonnes data
    r=zs.max_row+1
    for k,v in d.items():
        if k in COL and v is not None: zs.cell(row=r,column=COL[k],value=v)

n_v=n_d=n_r=0
for c in cities:
    if c["slugV"] in existing: continue
    proches=[x["ville"] for x in depts[c["slugD"]]["cities"] if x["ville"]!=c["ville"]][:4]
    d=dict(pageType="ville",ville=c["ville"],departementNom=c["deptNom"],departementNum=c["deptNum"],
        region=REGION_NAME.get(c["region"],c["region"]),typeZone=c["typeZone"],zoneClimatique=hzone(c["deptNum"]),
        slugVille=c["slugV"],slugDepartement=c["slugD"],slugRegion="cool-roof-"+REGION_SLUG[c["region"]],
        villesProches=", ".join(proches),pays="FR",
        reductionSousToiture={"H1":10,"H2":11,"H3":12}[hzone(c["deptNum"])],**(c["clim"] or {}))
    put(d); n_v+=1
for sd,d in depts.items():
    if sd in existing: continue
    put(dict(pageType="departement",departementNom=d["deptNom"],departementNum=d["deptNum"],
        region=REGION_NAME.get(d["region"],d["region"]),typeZone=d["typeZone"],zoneClimatique=hzone(d["deptNum"]),
        slugDepartement=sd,slugRegion="cool-roof-"+REGION_SLUG[d["region"]],pays="FR",
        reductionSousToiture={"H1":10,"H2":11,"H3":12}[hzone(d["deptNum"])],**d["clim"])); n_d+=1
for reg,ds in regions.items():
    slug="cool-roof-"+REGION_SLUG[reg]
    if slug in existing: continue
    clim=agg([{"clim":d["clim"]} for d in ds])
    put(dict(pageType="region",region=REGION_NAME.get(reg,reg),typeZone="",zoneClimatique="",
        slugRegion=slug,slugDepartement="",pays="FR",reductionSousToiture=10,**clim)); n_r+=1

# Suisse / Belgique — identité seule (hors grille Explore2 France)
CH=[("Genève","cool-roof-geneve","Tertiaire / Industriel"),("Lausanne","cool-roof-lausanne","Tertiaire / Industriel"),
 ("Sion","cool-roof-sion","Zone d'activités"),("Fribourg","cool-roof-fribourg","Industriel / Agroalimentaire"),
 ("Neuchâtel","cool-roof-neuchatel","Horlogerie / Microtechnique"),("La Chaux-de-Fonds","cool-roof-la-chaux-de-fonds","Horlogerie"),
 ("Yverdon-les-Bains","cool-roof-yverdon-les-bains","Technopôle"),("Vevey","cool-roof-vevey","Agroalimentaire")]
BE=[("Bruxelles","cool-roof-bruxelles","Logistique / Tertiaire"),("Liège","cool-roof-liege","Industriel / Portuaire"),
 ("Charleroi","cool-roof-charleroi","Industriel / Aéroportuaire"),("Namur","cool-roof-namur","Tertiaire"),
 ("Mons","cool-roof-mons","Logistique"),("La Louvière","cool-roof-la-louviere","Industriel"),
 ("Tournai","cool-roof-tournai","Logistique"),("Verviers","cool-roof-verviers","Industriel")]
n_x=0
for pays,reg,slugR,lst in [("CH","Suisse","cool-roof-suisse",CH),("BE","Belgique","cool-roof-belgique",BE)]:
    if slugR not in existing and pays=="BE":
        put(dict(pageType="region",region=reg,slugRegion=slugR,slugDepartement="",pays=pays)); n_x+=1
    for nom,slug,tz in lst:
        if slug in existing: continue
        proches=[n for n,_,_ in lst if n!=nom][:4]
        put(dict(pageType="ville",ville=nom,departementNom=reg,departementNum="",region=reg,typeZone=tz,
            slugVille=slug,slugDepartement=slugR,slugRegion=slugR,villesProches=", ".join(proches),pays=pays)); n_x+=1

# onglet Villes departement : TOUTES les villes du TSV + CH/BE
vt=wb["Villes departement"]
have={(r[0].value,r[2].value) for r in vt.iter_rows(min_row=2) if r[0].value}
for c in cities:
    if (c["slugD"],c["slugV"]) not in have: vt.append([c["slugD"],c["ville"],c["slugV"],c["typeZone"]])
for slugR,lst in [("cool-roof-suisse",CH),("cool-roof-belgique",BE)]:
    for nom,slug,tz in lst:
        if (slugR,slug) not in have: vt.append([slugR,nom,slug,tz])

wb.save(XLSX)
print(f"villes +{n_v} | départements +{n_d} | régions +{n_r} | CH/BE +{n_x}")
print("total lignes Zones:", zs.max_row-1, "| Villes departement:", vt.max_row-1)
