import json,os,re,html as H,markdown,unicodedata,difflib
from collections import Counter
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from googleapiclient.discovery import build as gbuild
from googleapiclient.http import MediaInMemoryUpload

ART=[("type-de-cheveux","/tmp/raw_type-de-cheveux.html"),
 ("acide-hyaluronique-cheveux","/tmp/raw_acide-hyaluronique-cheveux.html"),
 ("pp405","/tmp/raw_pp405-molecule-reveil-follicules.html"),
 ("bouton-dans-le-cuir-chevelu","/tmp/raw_bouton-dans-le-cuir-chevelu.html"),
 ("gros-front","/tmp/raw_gros-front.html"),
 ("greffe-de-cheveux-ratee","/tmp/raw_greffe-de-cheveux-ratee.html")]
def norm(s):
    s=unicodedata.normalize("NFKD",s).encode("ascii","ignore").decode().lower()
    return re.sub(r'[^a-z0-9 ]',' ',s); 
def heads_of(path):
    out=[]
    for l in open(path,encoding="utf-8"):
        m=re.match(r'^(#{2,3})\s+(.*)',l)
        if m: out.append(m.group(2).strip())
    return out
def src(slug,lang):
    for p in (f"translations/{slug}-{lang}.md",f"articles/translations/{slug}-{lang}.md"):
        if os.path.exists(p): return p
def frmd(slug):
    for p in (f"articles/{slug}.md",f"articles/translations/{slug}.md"):
        if os.path.exists(p): return p

def parse_fr(path):
    raw=open(path,encoding="utf-8").read()
    toks=re.findall(r'/wp-content/uploads/\d{4}/\d{2}/\d{2}[A-Z]{0,2}-([A-Z0-9-]+?)(?:-\d+x\d+)?\.(?:jpg|jpeg|png|webp)',raw,re.I)
    tok=Counter(t.upper() for t in toks).most_common(1)[0][0]
    ev=re.finditer(r'<h([23])\b[^>]*>(.*?)</h\1>|<img\b[^>]*>',raw,re.S|re.I)
    last=None; seen=set(); imgs=[]
    pat=re.compile(r'/wp-content/uploads/(\d{4}/\d{2})/(\d{2}[A-Z]{0,2}-'+re.escape(tok)+r'(?:-\d+x\d+)?\.(?:jpg|jpeg|png|webp))(?:\.webp)?',re.I)
    for m in ev:
        s=m.group(0)
        if s[:2].lower()=="<h":
            last=H.unescape(re.sub(r'<[^>]+>','',m.group(2))).strip()
        else:
            mm=pat.search(s)
            if not mm: continue
            num=re.match(r'(\d{2})',mm.group(2)).group(1)
            if num in seen: continue
            seen.add(num)
            imgs.append({"num":num,"head":last,"url":"https://emrahcinik.com/wp-content/uploads/"+mm.group(1)+"/"+mm.group(2)})
    return tok,imgs
def match_idx(fr_head, frmd_heads):
    if fr_head is None: return -1
    fn=norm(fr_head); cands=[norm(h) for h in frmd_heads]
    for i,c in enumerate(cands):
        if c==fn or c in fn or fn in c: return i
    r=difflib.get_close_matches(fn,cands,n=1,cutoff=0.6)
    return cands.index(r[0]) if r else -1
def lang_url(u,lang): return re.sub(r'/(\d{2})FR-',rf'/\1{lang.upper()}-',u)
def fig(u,alt): return f'<figure><img src="{u}" alt="{H.escape(alt)}" /></figure>'

def build(slug,lang,imgs,frmd_heads):
    md=open(src(slug,lang),encoding="utf-8").read().split("\n")
    i=next(k for k,l in enumerate(md) if l.startswith("# "))
    title=md[i][2:].strip(); body="\n".join(md[i+1:]).strip()
    html=markdown.markdown(body,extensions=["extra","sane_lists"])
    parts=re.split(r'(<h[23][^>]*>.*?</h[23]>)',html,flags=re.S)
    # placements: target heading index -> [imgs]
    place={}; hero=None; topextra=[]
    heroes=[im for im in imgs if im["num"]=="01"]
    hero=heroes[0] if heroes else None
    for im in imgs:
        if im is hero: continue
        j=match_idx(im["head"],frmd_heads)
        if j==-1: topextra.append(im); continue
        place.setdefault(j,[]).append(im)
    out=[]
    if hero: out.append(fig(lang_url(hero["url"],lang),title))
    for im in sorted(topextra,key=lambda x:x["num"]): out.append(fig(lang_url(im["url"],lang),title))
    hidx=-1
    for seg in parts:
        out.append(seg)
        if re.match(r'<h[23]',seg.strip()):
            hidx+=1; cur=H.unescape(re.sub(r'<[^>]+>','',seg)).strip()
            for im in sorted(place.get(hidx,[]),key=lambda x:x["num"]):
                out.append(fig(lang_url(im["url"],lang),cur))
    return "\n".join(s for s in out if s.strip()),title

tsv={}
for l in open("_translations_urls.tsv"):
    p=l.rstrip("\n").split("\t")
    if len(p)>=3: tsv[(p[0],p[1])]=p[2]
t=json.load(open(os.path.expanduser("~/.config/cinik-gdoc/token.json")))
cr=Credentials(token=t["token"],refresh_token=t["refresh_token"],token_uri=t["token_uri"],client_id=t["client_id"],client_secret=t["client_secret"],scopes=t["scopes"]);cr.refresh(Request())
drive=gbuild("drive","v3",credentials=cr,cache_discovery=False)
for slug,raw in ART:
    tok,imgs=parse_fr(raw); fh=heads_of(frmd(slug))
    matched=sum(1 for im in imgs if im["num"]!="01" and match_idx(im["head"],fh)!=-1)
    print(f"\n### {slug}  imgs={len(imgs)} frmd_heads={len(fh)} placés(non-héro)={matched}")
    for im in imgs:
        j=match_idx(im["head"],fh)
        print(f"   {im['num']}  hFR='{(im['head'] or 'TOP')[:38]}' -> idx {j}")
    for lang in ("en","it"):
        wp,title=build(slug,lang,imgs,fh)
        open(f"html_wp/{slug}-{lang}.html","w").write(wp)
        fid=re.search(r"/document/d/([^/]+)",tsv[(slug,lang)]).group(1)
        drive.files().update(fileId=fid,media_body=MediaInMemoryUpload(wp.encode("utf-8"),mimetype="text/plain",resumable=False),fields="id",supportsAllDrives=True).execute()
        try: drive.permissions().create(fileId=fid,body={"type":"anyone","role":"writer"},fields="id").execute()
        except Exception: pass