#!/bin/bash
# Runner v2 robuste : séquentiel, workers=2 (pic mémoire bas), health-check +
# restart serveur AVANT chaque shard (le cgroup partagé peut OOM-kill le serveur).
# Continue malgré les échecs. Résultats agrégés dans results/full.
SITE="/tmp/claude-1001/-opt-projects--covalba-next-wp-worktrees-tom-check-du-site/2e5bec39-75a9-4f87-8bff-902ecedde81a/scratchpad/site-check"
WORKTREE="/opt/projects/.covalba-next-wp-worktrees/tom-check-du-site"
LOGDIR="$SITE/results/shard-logs"
cd "$SITE" || exit 1
source /etc/profile.d/ai-gateway.sh 2>/dev/null
export FULLPAGE=0   # viewport-only : limite le pic mémoire (cgroup partagé tendu)

mem() { awk '{printf "%.1fG",$1/1073741824}' /sys/fs/cgroup/system.slice/ttyd.service/memory.current 2>/dev/null; }

ensure_server() {
  local code pid
  code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 8 http://localhost:3101/ 2>/dev/null)
  if [ "$code" = "200" ]; then return 0; fi
  echo "  [server] HS (code=$code) → restart"
  pid=$(ss -ltnp 2>/dev/null | grep ":3101" | grep -oE "pid=[0-9]+" | head -1 | cut -d= -f2)
  [ -n "$pid" ] && kill "$pid" 2>/dev/null && sleep 2
  ( cd "$WORKTREE" && PORT=3101 HOST=0.0.0.0 setsid nohup npm start > "$LOGDIR/server-runner.log" 2>&1 & )
  curl -s -o /dev/null --retry 40 --retry-delay 2 --retry-all-errors --retry-connrefused --max-time 120 http://localhost:3101/ 2>/dev/null
  code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 8 http://localhost:3101/ 2>/dev/null)
  echo "  [server] après restart: code=$code"
}

rm -rf results/full screenshots/full
mkdir -p results/full screenshots/full "$LOGDIR"

echo "RUN_START $(date +%T) mem=$(mem)"
for s in shards/shard-*.json; do
  name=$(basename "$s" .json)
  ensure_server
  echo "=== START $name $(date +%T) mem=$(mem) ==="
  SHARD_FILE="$SITE/$s" RESULTS_DIR="$SITE/results/full" SCREENSHOT_DIR="$SITE/screenshots/full" \
  PW_JSON="$LOGDIR/${name}-pw.json" PW_WORKERS=2 PASSMARK_LOG_LEVEL=warn \
    npx playwright test --project=desktop --project=mobile > "$LOGDIR/${name}.log" 2>&1
  ec=$?
  # sanity : combien de résultats 200 dans CE shard ?
  ok=$(node -e 'const fs=require("fs"),p=require("path");const dir=process.argv[1];const shard=JSON.parse(fs.readFileSync(process.argv[2]));const san=s=>s.replace(/[^a-z0-9]+/gi,"_").replace(/^_|_$/g,"")||"root";let ok=0;for(const e of shard){for(const v of ["desktop","mobile"]){const f=p.join(dir,`${san(e.path)}__${v}.json`);try{const r=JSON.parse(fs.readFileSync(f));if(r.httpStatus>=200&&r.httpStatus<400)ok++;}catch{}}}console.log(ok)' "$SITE/results/full" "$SITE/$s" 2>/dev/null)
  echo "=== END $name exit=$ec ok200=$ok mem=$(mem) total=$(ls results/full|wc -l) $(date +%T) ==="
  # si le shard est revenu vide (serveur tombé pendant) → restart + retry une fois
  if [ "${ok:-0}" -lt 3 ]; then
    echo "  [retry] $name semble vide (ok200=$ok) → restart serveur + retry"
    ensure_server
    SHARD_FILE="$SITE/$s" RESULTS_DIR="$SITE/results/full" SCREENSHOT_DIR="$SITE/screenshots/full" \
    PW_JSON="$LOGDIR/${name}-pw-retry.json" PW_WORKERS=2 PASSMARK_LOG_LEVEL=warn \
      npx playwright test --project=desktop --project=mobile > "$LOGDIR/${name}-retry.log" 2>&1
    echo "  [retry] $name done exit=$? total=$(ls results/full|wc -l)"
  fi
done
echo "ALL_SHARDS_DONE $(date +%T) total=$(ls results/full|wc -l) mem=$(mem)"
