#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Géocode les villes du TSV et persiste slug -> {lat,lon} dans scripts/geocode-cache.json."""
import json, re, time, urllib.request, urllib.parse, os

TSV="docs/local-seo-pages-source.tsv"
OUT="scripts/geocode-cache.json"
ALIAS={"Roissy":"Roissy-en-France","Garonor":"Aulnay-sous-Bois","La Plaine":"Saint-Denis",
 "Courtabœuf":"Villebon-sur-Yvette","Couloir de la chimie":"Feyzin","Rueil":"Rueil-Malmaison",
 "Flins":"Flins-sur-Seine","Bonneuil":"Bonneuil-sur-Marne","La Défense":"Puteaux",
 "Cergy-Pontoise":"Cergy","Saint-Quentin-en-Yvelines":"Montigny-le-Bretonneux",
 "Marne-la-Vallée":"Champs-sur-Marne","Marcoule":"Bagnols-sur-Cèze","Sophia Antipolis":"Valbonne",
 "Saint-Charles":"Perpignan","Lacq":"Mourenx","Marignane":"Marignane","Sénart":"Lieusaint",
 "Senart":"Lieusaint","Saint-Quentin":"Saint-Quentin","Venette":"Venette","Pompey":"Pompey"}

cache=json.load(open(OUT)) if os.path.exists(OUT) else {}
rows=[l.split('\t') for l in open(TSV,encoding='utf-8').read().splitlines()[1:] if l.strip()]
fails=[]
for i,r in enumerate(rows):
    ville,dept,slug=r[2],r[1],r[4]
    if slug in cache: continue
    m=re.search(r"\(([0-9AB]+)\)",dept); num=m.group(1) if m else ""
    q=ALIAS.get(ville,ville)
    coord=None
    for url in (f"https://geo.api.gouv.fr/communes?nom={urllib.parse.quote(q)}&fields=centre&codeDepartement={num}&limit=1&boost=population",
                f"https://geo.api.gouv.fr/communes?nom={urllib.parse.quote(q)}&fields=centre&limit=1&boost=population"):
        try:
            res=json.load(urllib.request.urlopen(url,timeout=10))
            if res: c=res[0]["centre"]["coordinates"]; coord={"lat":c[1],"lon":c[0]}; break
        except Exception: pass
    if coord: cache[slug]=coord
    else: fails.append(ville)
    if i%30==0: time.sleep(0.05)
json.dump(cache,open(OUT,'w'),ensure_ascii=False,indent=0)
print(f"géocodées: {len(cache)}/{len(rows)} | échecs: {fails or 'aucun'}")
