import json,os,re,html as H,markdown,glob
from collections import Counter
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from googleapiclient.discovery import build as gbuild
from googleapiclient.http import MediaInMemoryUpload

ART=[("type-de-cheveux","/tmp/raw_type-de-cheveux.html"),
 ("acide-hyaluronique-cheveux","/tmp/raw_acide-hyaluronique-cheveux.html"),
 ("pp405","/tmp/raw_pp405-molecule-reveil-follicules.html"),
 ("bouton-dans-le-cuir-chevelu","/tmp/raw_bouton-dans-le-cuir-chevelu.html"),
 ("gros-front","/tmp/raw_gros-front.html"),
 ("greffe-de-cheveux-ratee","/tmp/raw_greffe-de-cheveux-ratee.html")]

def srcfile(slug,lang):
    for p in (f"translations/{slug}-{lang}.md", f"articles/translations/{slug}-{lang}.md"):
        if os.path.exists(p): return p
    return None

def parse_fr(path):
    raw=open(path,encoding="utf-8").read()
    toks=re.findall(r'/wp-content/uploads/\d{4}/\d{2}/\d{2}[A-Z]{0,2}-([A-Z0-9-]+?)(?:-\d+x\d+)?\.(?:jpg|jpeg|png|webp)',raw,re.I)
    if not toks: return None,[]
    tok=Counter(t.upper() for t in toks).most_common(1)[0][0]
    ev=re.finditer(r'<h([23])\b[^>]*>(.*?)</h\1>|<img\b[^>]*>',raw,re.S|re.I)
    hi=-1; seen=set(); imgs=[]; heads=[]
    pat=re.compile(r'/wp-content/uploads/(\d{4}/\d{2})/(\d{2}[A-Z]{0,2}-'+re.escape(tok)+r'(?:-\d+x\d+)?\.(?:jpg|jpeg|png|webp))(?:\.webp)?',re.I)
    for m in ev:
        s=m.group(0)
        if s[:2].lower()=="<h":
            hi+=1; heads.append(H.unescape(re.sub(r'<[^>]+>','',m.group(2))).strip())
        else:
            mm=pat.search(s)
            if not mm: continue
            num=re.match(r'(\d{2})',mm.group(2)).group(1)
            if num in seen: continue
            seen.add(num)
            url="https://emrahcinik.com/wp-content/uploads/"+mm.group(1)+"/"+mm.group(2)
            imgs.append({"num":num,"after":hi,"url":url})
    return tok,heads,imgs

def lang_url(url,lang):  # swap localized 04FR-> 04EN/IT
    return re.sub(r'/(\d{2})FR-',rf'/\1{lang.upper()}-',url)

def build(slug,lang,heads_fr,imgs):
    md=open(srcfile(slug,lang),encoding="utf-8").read().split("\n")
    i=next(k for k,l in enumerate(md) if l.startswith("# "))
    title=md[i][2:].strip()
    body="\n".join(md[i+1:]).strip()
    html=markdown.markdown(body,extensions=["extra","sane_lists"])
    parts=re.split(r'(<h[23][^>]*>.*?</h[23]>)',html,flags=re.S)
    # target heading texts in order
    def fig(u,alt): return f'<figure><img src="{u}" alt="{H.escape(alt)}" /></figure>'
    # hero = image with after==-1 (or num 01)
    hero=[im for im in imgs if im["after"]==-1] or [im for im in imgs if im["num"]=="01"]
    out=[]
    if hero: out.append(fig(lang_url(hero[0]["url"],lang), title))
    hidx=-1; cur_head=title
    bynum_used=set(h["num"] for h in hero[:1])
    for seg in parts:
        out.append(seg)
        if re.match(r'<h[23]',seg.strip()):
            hidx+=1
            cur_head=H.unescape(re.sub(r'<[^>]+>','',seg)).strip()
            for im in imgs:
                if im["after"]==hidx and im["num"] not in bynum_used:
                    bynum_used.add(im["num"])
                    out.append(fig(lang_url(im["url"],lang), cur_head))
    return "\n".join(s for s in out if s.strip()), title

tsv={}
for l in open("_translations_urls.tsv"):
    p=l.rstrip("\n").split("\t")
    if len(p)>=3: tsv[(p[0],p[1])]=p[2]
t=json.load(open(os.path.expanduser("~/.config/cinik-gdoc/token.json")))
cr=Credentials(token=t["token"],refresh_token=t["refresh_token"],token_uri=t["token_uri"],client_id=t["client_id"],client_secret=t["client_secret"],scopes=t["scopes"]);cr.refresh(Request())
drive=gbuild("drive","v3",credentials=cr,cache_discovery=False)
os.makedirs("html_wp",exist_ok=True)
for slug,raw in ART:
    tok,heads_fr,imgs=parse_fr(raw)
    print(f"\n### {slug} token={tok} FR_headings={len(heads_fr)} images={len(imgs)} (nums {[im['num'] for im in imgs]})")
    for lang in ("en","it"):
        if (slug,lang) not in tsv: print(f"  {lang}: pas de doc tsv"); continue
        wp,title=build(slug,lang,heads_fr,imgs)
        open(f"html_wp/{slug}-{lang}.html","w").write(wp)
        fid=re.search(r"/document/d/([^/]+)",tsv[(slug,lang)]).group(1)
        drive.files().update(fileId=fid,media_body=MediaInMemoryUpload(wp.encode("utf-8"),mimetype="text/plain",resumable=False),fields="id",supportsAllDrives=True).execute()
        try: drive.permissions().create(fileId=fid,body={"type":"anyone","role":"writer"},fields="id").execute()
        except Exception: pass
        print(f"  {lang}: {wp.count('<img')} imgs, {len(wp)} chars -> {fid}")