#!/usr/bin/env python3
"""
build_deck.py - turn a cards.json into an Anki .apkg deck.

This is the public reference script shipped with the ai-flashcards
course (https://clawvard.school/courses/ai-flashcards). The hard work
of writing good Q/A cards from a PDF / Markdown source is done by the
user's agent IDE (Claude Code / Cursor / Codex CLI) reading the source
material in-session and writing cards.json by hand following the SOP.
This script's only job is to take that cards.json and emit an Anki
deck that imports cleanly into Anki 2.1.x Desktop, AnkiDroid, AnkiWeb.

cards.json schema (one JSON array; each item):
  {
    "front":   "the question side (str, required)",
    "back":    "the answer side (str, required)",
    "tags":    ["topic-slug", ...]            (str list, optional),
    "subdeck": "Deck Name::Chapter Name"      (str, optional),
    "cloze":   true / false                   (bool, optional)
  }

When `cloze` is true, `front` must contain at least one `{{c1::...}}`
Anki cloze marker; `back` is rendered as the "Extra" field shown
under the cloze answer.

Standalone, MIT, no third-party API calls. Reads only the local
genanki package + the user's cards.json. The generated .apkg is
the only output (plus a short stdout summary).
"""

from __future__ import annotations

import argparse
import hashlib
import json
import re
import sys
from collections import Counter
from pathlib import Path

try:
    import genanki
except ImportError:
    sys.exit(
        "genanki not installed. Run:\n"
        "  python3 -m venv .afvenv && source .afvenv/bin/activate && pip install genanki"
    )

# ── stable model / deck IDs ──────────────────────────────────────────
# genanki recommends a 31-bit int derived from a stable string so the
# same deck name keeps merging on re-import instead of creating a new
# top-level deck each run. We hash the course slug + a kind suffix.
def _stable_id(seed: str) -> int:
    h = hashlib.sha256(seed.encode("utf-8")).digest()
    # genanki id must fit in 31 bits and not collide with built-in models
    return 1_000_000_000 + (int.from_bytes(h[:4], "big") & 0x3FFF_FFFF)


BASIC_MODEL = genanki.Model(
    model_id=_stable_id("clawvard.ai-flashcards.basic.v1"),
    name="Clawvard AI-Flashcards Basic",
    fields=[
        {"name": "Front"},
        {"name": "Back"},
        {"name": "Source"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "<div class='front'>{{Front}}</div>",
            "afmt": (
                "<div class='front'>{{Front}}</div>"
                "<hr id='answer'>"
                "<div class='back'>{{Back}}</div>"
                "{{#Source}}<div class='source'>{{Source}}</div>{{/Source}}"
            ),
        }
    ],
    css=(
        ".card{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,"
        "Helvetica,Arial,sans-serif;font-size:18px;color:#1f2330;background:#fafaf7;"
        "padding:24px;line-height:1.55}"
        ".front{font-weight:600}"
        ".back{margin-top:10px}"
        ".source{margin-top:14px;font-size:12px;color:#878b9a;font-family:ui-monospace,"
        "'SF Mono','JetBrains Mono',Menlo,Consolas,monospace}"
        "hr#answer{border:0;border-top:1px dashed #d8cfba;margin:14px 0}"
    ),
)


CLOZE_MODEL = genanki.Model(
    model_id=_stable_id("clawvard.ai-flashcards.cloze.v1"),
    name="Clawvard AI-Flashcards Cloze",
    model_type=genanki.Model.CLOZE,
    fields=[
        {"name": "Text"},
        {"name": "Extra"},
        {"name": "Source"},
    ],
    templates=[
        {
            "name": "Cloze",
            "qfmt": "<div class='front'>{{cloze:Text}}</div>",
            "afmt": (
                "<div class='front'>{{cloze:Text}}</div>"
                "{{#Extra}}<hr id='answer'><div class='back'>{{Extra}}</div>{{/Extra}}"
                "{{#Source}}<div class='source'>{{Source}}</div>{{/Source}}"
            ),
        }
    ],
    css=BASIC_MODEL.css,
)


_SLUG_RE = re.compile(r"[^A-Za-z0-9]+")


def slugify(s: str) -> str:
    return _SLUG_RE.sub("-", s.strip()).strip("-").lower() or "deck"


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description="Build an Anki .apkg deck from a JSON list of cards.",
    )
    p.add_argument(
        "--cards",
        required=True,
        type=Path,
        help="Path to cards.json (a JSON array of card objects).",
    )
    p.add_argument(
        "--deck-name",
        required=True,
        help="Top-level Anki deck name (e.g. 'Biology 101').",
    )
    p.add_argument(
        "--out",
        required=True,
        type=Path,
        help="Output .apkg path (e.g. ./out/flashcards.apkg).",
    )
    return p.parse_args()


def load_cards(path: Path) -> list[dict]:
    if not path.exists():
        sys.exit(f"cards.json not found: {path}")
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as e:
        sys.exit(f"cards.json is not valid JSON: {e}")
    if not isinstance(data, list):
        sys.exit("cards.json must be a JSON array at the top level.")
    return data


def validate_card(card: dict, idx: int) -> None:
    if not isinstance(card, dict):
        sys.exit(f"cards[{idx}] is not an object")
    if "front" not in card or not isinstance(card["front"], str) or not card["front"].strip():
        sys.exit(f"cards[{idx}].front is missing or empty")
    if "back" not in card or not isinstance(card["back"], str):
        # cloze cards may carry empty back, but require the key
        sys.exit(f"cards[{idx}].back is missing")
    if card.get("cloze") and "{{c" not in card["front"]:
        sys.exit(
            f"cards[{idx}] is marked cloze but front has no {{c1::...}} marker"
        )
    tags = card.get("tags", [])
    if not isinstance(tags, list) or any(not isinstance(t, str) for t in tags):
        sys.exit(f"cards[{idx}].tags must be a list of strings")


def _tag_clean(t: str) -> str:
    # Anki tags must not contain whitespace
    return _SLUG_RE.sub("-", t.strip()).strip("-")


def build_package(cards: list[dict], deck_name: str) -> tuple[genanki.Package, dict]:
    root_id = _stable_id(f"deck::{deck_name}")
    decks: dict[str, genanki.Deck] = {deck_name: genanki.Deck(root_id, deck_name)}

    front_seen: dict[str, int] = {}
    duplicates: list[tuple[int, str]] = []
    cloze_count = basic_count = 0
    tag_counter: Counter[str] = Counter()

    for i, card in enumerate(cards):
        validate_card(card, i)

        front = card["front"].strip()
        back = card["back"].strip()
        source = (card.get("source") or "").strip()
        subdeck = (card.get("subdeck") or "").strip() or deck_name
        if not subdeck.startswith(deck_name):
            subdeck = f"{deck_name}::{subdeck}"
        tags = [
            _tag_clean(t) for t in card.get("tags", []) if _tag_clean(t)
        ]
        for t in tags:
            tag_counter[t] += 1

        if subdeck not in decks:
            decks[subdeck] = genanki.Deck(_stable_id(f"deck::{subdeck}"), subdeck)

        is_cloze = bool(card.get("cloze"))
        if is_cloze:
            note = genanki.Note(
                model=CLOZE_MODEL,
                fields=[front, back, source],
                tags=tags,
            )
            cloze_count += 1
        else:
            note = genanki.Note(
                model=BASIC_MODEL,
                fields=[front, back, source],
                tags=tags,
            )
            basic_count += 1
        decks[subdeck].add_note(note)

        # dedup check on the visible front string
        key = re.sub(r"\s+", " ", front).lower()
        if key in front_seen:
            duplicates.append((i, front))
        else:
            front_seen[key] = i

    package = genanki.Package(list(decks.values()))
    summary = {
        "total": len(cards),
        "basic": basic_count,
        "cloze": cloze_count,
        "subdecks": sorted(decks.keys()),
        "tags": dict(tag_counter.most_common()),
        "duplicate_fronts": duplicates,
    }
    return package, summary


def main() -> int:
    args = parse_args()
    cards = load_cards(args.cards)
    if not cards:
        sys.exit("cards.json is empty")

    args.out.parent.mkdir(parents=True, exist_ok=True)
    package, summary = build_package(cards, args.deck_name)
    package.write_to_file(str(args.out))

    print(f"\n  wrote {args.out}  ({summary['total']} cards)")
    print(
        f"  · basic: {summary['basic']}  cloze: {summary['cloze']}"
        f"  · subdecks: {len(summary['subdecks'])}"
        f"  · tags: {len(summary['tags'])}"
    )
    for d in summary["subdecks"]:
        print(f"      - {d}")
    if summary["tags"]:
        top = ", ".join(f"{t}({n})" for t, n in list(summary["tags"].items())[:8])
        print(f"  · top tags: {top}")
    if summary["duplicate_fronts"]:
        print(
            f"  ! {len(summary['duplicate_fronts'])} duplicate front(s) — review:"
        )
        for idx, front in summary["duplicate_fronts"][:5]:
            print(f"      cards[{idx}]: {front[:80]}")

    print("\n  Drag the .apkg into Anki Desktop, or upload to AnkiWeb, to start review.")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
