#!/usr/bin/env python3
"""
Skill Dependency Manifest Generator

Scans a skill directory and generates a dependency manifest JSON (dependency-manifest.json)
detecting Python imports, file references, CLI tool dependencies, and skill dependencies.

Usage:
    dep_manifest.py <skill-dir>                  # Write dependency-manifest.json
    dep_manifest.py <skill-dir> --tree           # CLI pretty-print instead
    dep_manifest.py <skill-dir> --stdout         # Print JSON to stdout
"""

import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any


# ── Constants ──────────────────────────────────────────────────────────────
# Shared constants for output schema (matches SPEC.md contract)
CATEGORIES = ["imports", "fileRefs", "binDeps", "skillDeps"]

PYTHON_STDLIB = {
    "abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
    "asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
    "bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
    "cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
    "compileall", "concurrent", "configparser", "contextlib", "contextvars",
    "copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
    "dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
    "distutils", "doctest", "email", "encodings", "enum", "errno",
    "faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
    "ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
    "graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
    "idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
    "ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
    "locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
    "mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
    "nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
    "pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil",
    "platform", "plistlib", "poplib", "posix", "posixpath", "pprint",
    "profile", "pstats", "pty", "pwd", "py_compile", "pyclbr",
    "pydoc", "queue", "quopri", "random", "re", "readline", "reprlib",
    "resource", "rlcompleter", "runpy", "sched", "secrets", "select",
    "selectors", "shelve", "shlex", "shutil", "signal", "site", "smtpd",
    "smtplib", "sndhdr", "socket", "socketserver", "sqlite3", "ssl",
    "stat", "statistics", "string", "stringprep", "struct", "subprocess",
    "sunau", "symtable", "sys", "sysconfig", "syslog", "tabnanny",
    "tarfile", "telnetlib", "tempfile", "termios", "test", "textwrap",
    "threading", "time", "timeit", "tkinter", "token", "tokenize",
    "tomllib", "trace", "traceback", "tracemalloc", "tty", "turtle",
    "turtledemo", "types", "typing", "unicodedata", "unittest", "urllib",
    "uu", "uuid", "venv", "warnings", "wave", "weakref", "webbrowser",
    "winreg", "winsound", "wsgiref", "xdrlib", "xml", "xmlrpc",
    "zipapp", "zipfile", "zipimport", "zlib",
}


# ── Detectors ──────────────────────────────────────────────────────────────

def detect_python_imports(file_path: Path) -> list[dict[str, Any]]:
    """Scan a .py file for import statements."""
    results: list[dict[str, Any]] = []
    try:
        content = file_path.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return results

    # Match: import foo, import foo.bar, from foo import bar
    patterns = [
        re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"),
        re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"),
    ]

    for line_num, line in enumerate(content.splitlines(), 1):
        for pattern in patterns:
            m = pattern.match(line)
            if m:
                mod_name = m.group(1)
                # Take the top-level module name
                top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
                in_stdlib = top_mod in PYTHON_STDLIB
                results.append({
                    "name": mod_name,
                    "line": line_num,
                    "file": str(file_path.relative_to(file_path.parent.parent) if file_path.parent.parent else file_path.name),
                    "type": "stdlib" if in_stdlib else "third-party",
                    "status": "ok" if in_stdlib else "warn",
                    "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
                })
    return results


def detect_imports_from_code_blocks(content: str, source_file: str) -> list[dict[str, Any]]:
    """Scan code blocks in markdown for Python imports."""
    results: list[dict[str, Any]] = []
    patterns = [
        re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"),
        re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"),
    ]

    # Find fenced code blocks
    block_pattern = re.compile(r"```(?:python|py|bash|sh|shell)?\n(.*?)```", re.DOTALL)
    for block_match in block_pattern.finditer(content):
        block_text = block_match.group(1)
        # Only scan python blocks for imports
        # For simplicity, scan all blocks but only report python-related ones
        for line_num_offset, line in enumerate(block_text.splitlines(), 1):
            for pattern in patterns:
                m = pattern.match(line)
                if m:
                    mod_name = m.group(1)
                    top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
                    in_stdlib = top_mod in PYTHON_STDLIB
                    results.append({
                        "name": mod_name,
                        "line": f"code-block:{line_num_offset}",
                        "file": source_file,
                        "type": "stdlib" if in_stdlib else "third-party",
                        "status": "ok" if in_stdlib else "warn",
                        "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
                    })
    return results


def detect_file_references(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
    """Scan SKILL.md for file/directory references (paths to scripts/, references/, assets/)."""
    results: list[dict[str, Any]] = []
    # Look for markdown links, backtick paths, and inline references
    patterns = [
        # Markdown links like [text](path)
        re.compile(r"\[.*?\]\(((?:scripts/|references/|assets/)\S+)\)"),
        # Backtick paths like `scripts/foo.py`
        re.compile(r"`((?:scripts/|references/|assets/)\S+)`"),
        # Inline mentions like (see scripts/foo.py)
        re.compile(r"\(((?:scripts/|references/|assets/)\S+)\)"),
    ]

    for line_num, line in enumerate(skill_md_content.splitlines(), 1):
        for pattern in patterns:
            for m in pattern.finditer(line):
                ref_path = m.group(1).rstrip(".)`")
                results.append({
                    "name": ref_path,
                    "line": line_num,
                    "file": source_file,
                    "status": "ok",  # Will be re-evaluated after full scan
                    "suggestion": "",
                })

    return results


KNOWN_CLI_TOOLS = [
    "gh", "jq", "ffmpeg", "ffprobe", "git", "python3", "python", "node",
    "npm", "npx", "docker", "docker-compose", "curl", "wget", "rsync",
    "trash", "sips", "convert", "magick", "identify", "pdftotext",
    "pdftoppm", "pdfunite", "qpdf", "exiftool", "sox", "youtube-dl",
    "yt-dlp", "grep", "sed", "awk", "sort", "uniq", "tee", "xargs",
    "find", "rg", "fzf", "bat", "delta", "tmux", "screen", "ssh",
    "scp", "tailscale", "systemctl", "journalctl", "cargo", "go",
    "ruby", "perl", "make", "cmake", "pip3", "pip", "poetry", "uv",
    "brew", "apt", "apt-get", "yum", "dnf", "pacman", "nix-env",
    "helm", "kubectl", "terraform", "ansible", "pulumi", "vault",
    "aws", "gcloud", "az", "psql", "mysql", "sqlite3", "redis-cli",
]


def detect_bin_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
    """Scan SKILL.md for mentions of CLI tools."""
    results: list[dict[str, Any]] = []
    seen: set[str] = set()

    for line_num, line in enumerate(skill_md_content.splitlines(), 1):
        for tool in KNOWN_CLI_TOOLS:
            # Match as a word boundary pattern
            pattern = re.compile(rf"(?<![a-zA-Z0-9./-]){re.escape(tool)}(?![a-zA-Z0-9./-])")
            if pattern.search(line) and tool not in seen:
                seen.add(tool)
                results.append({
                    "name": tool,
                    "line": line_num,
                    "file": source_file,
                    "status": "warn",
                    "suggestion": f"CLI tool '{tool}' must be installed on the system",
                })
    return results


def detect_skill_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
    """Scan SKILL.md for 'name:' frontmatter references to other skills."""
    results: list[dict[str, Any]] = []
    # Look for skill references in the body (not the skill's own frontmatter)
    # Patterns: inline skill descriptions, "See X skill", skill name mentions
    skill_ref_patterns = [
        re.compile(r"(?:see|using|via|with)\s+the?\s+([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE),
        re.compile(r"`([a-z][a-z0-9-]+)`\s+skill", re.IGNORECASE),
    ]

    for line_num, line in enumerate(skill_md_content.splitlines(), 1):
        for pattern in skill_ref_patterns:
            for m in pattern.finditer(line):
                skill_name = m.group(1).lower().strip()
                results.append({
                    "name": skill_name,
                    "line": line_num,
                    "file": source_file,
                    "status": "warn",
                    "suggestion": f"Referenced skill '{skill_name}' must be installed",
                })
    return results


# ── Path validation ────────────────────────────────────────────────────────

def validate_existing_paths(results: list[dict[str, Any]], skill_dir: Path) -> None:
    """Update status of fileRefs based on whether the referenced path exists."""
    for entry in results:
        if "fileRefs" not in str(entry):  # We'll check from outside
            continue
    # Done in caller for category separation


def check_file_refs_exist(entries: list[dict[str, Any]], skill_dir: Path) -> None:
    """Check if referenced files actually exist in the skill directory."""
    for entry in entries:
        ref_path = entry["name"]
        full_path = skill_dir / ref_path
        if full_path.exists() or full_path.is_symlink():
            entry["status"] = "ok"
            entry["suggestion"] = ""
        else:
            entry["status"] = "error"
            entry["suggestion"] = f"Referenced path '{ref_path}' does not exist in skill directory"


# ── Main scan ──────────────────────────────────────────────────────────────

def scan_skill(skill_dir: Path) -> dict[str, Any]:
    """Scan a skill directory and return the dependency manifest."""
    imports: list[dict[str, Any]] = []
    file_refs: list[dict[str, Any]] = []
    bin_deps: list[dict[str, Any]] = []
    skill_deps: list[dict[str, Any]] = []

    # Scan all .py files in scripts/ and subdirectories
    for py_file in skill_dir.rglob("*.py"):
        imports.extend(detect_python_imports(py_file))

    # Scan SKILL.md
    skill_md = skill_dir / "SKILL.md"
    if skill_md.exists():
        try:
            content = skill_md.read_text(encoding="utf-8", errors="replace")
        except OSError:
            content = ""
    else:
        content = ""

    if content:
        # Imports from code blocks in SKILL.md
        imports_from_md = detect_imports_from_code_blocks(content, "SKILL.md")
        imports.extend(imports_from_md)

        # File references in SKILL.md
        file_refs = detect_file_references(content, "SKILL.md")

        # CLI tool deps in SKILL.md
        bin_deps = detect_bin_deps(content, "SKILL.md")

        # Skill deps in SKILL.md
        skill_deps = detect_skill_deps(content, "SKILL.md")

    # Validate file references
    if file_refs:
        check_file_refs_exist(file_refs, skill_dir)

    # Deduplicate imports by (name, file) pairs
    seen_imports: set[tuple[str, str]] = set()
    deduped_imports = []
    for entry in imports:
        key = (entry["name"], entry["file"])
        if key not in seen_imports:
            seen_imports.add(key)
            deduped_imports.append(entry)
    imports = deduped_imports

    # Deduplicate bin deps by name
    seen_bins: set[str] = set()
    deduped_bins = []
    for entry in bin_deps:
        if entry["name"] not in seen_bins:
            seen_bins.add(entry["name"])
            deduped_bins.append(entry)
    bin_deps = deduped_bins

    # Deduplicate skill deps by name
    seen_skills: set[str] = set()
    deduped_skills = []
    for entry in skill_deps:
        if entry["name"] not in seen_skills:
            seen_skills.add(entry["name"])
            deduped_skills.append(entry)
    skill_deps = deduped_skills

    return {
        "manifestVersion": "1.0",
        "skillDir": str(skill_dir.resolve()),
        "imports": imports,
        "fileRefs": file_refs,
        "binDeps": bin_deps,
        "skillDeps": skill_deps,
    }


# ── Pretty print ───────────────────────────────────────────────────────────

def print_tree(manifest: dict[str, Any]) -> None:
    """Pretty-print the manifest as a CLI tree."""
    print(f"\n{'='*60}")
    print(f"  Dependency Manifest: {manifest['skillDir']}")
    print(f"  Version: {manifest['manifestVersion']}")
    print(f"{'='*60}")

    for category in CATEGORIES:
        entries = manifest.get(category, [])
        if not entries:
            continue

        # Category label
        labels = {"imports": "Python Imports", "fileRefs": "File References",
                   "binDeps": "CLI Tools", "skillDeps": "Skill Dependencies"}
        label = labels.get(category, category)
        print(f"\n  📦 {label} ({len(entries)})")
        print(f"  {'─'*58}")

        for entry in entries:
            status_icon = {"ok": "✓", "warn": "⚠", "error": "✗"}.get(entry.get("status", ""), "?")
            name = entry["name"]
            line = entry.get("line", "?")
            suggestion = entry.get("suggestion", "")
            print(f"    {status_icon} {name}  (line {line})")
            if suggestion:
                print(f"       → {suggestion}")


# ── Entry point ────────────────────────────────────────────────────────────

def main() -> None:
    parser = argparse.ArgumentParser(
        description="Generate dependency manifest for a skill directory",
    )
    parser.add_argument("skill_dir", help="Path to skill directory")
    parser.add_argument(
        "--tree",
        action="store_true",
        help="Print dependency tree to stdout instead of writing manifest.json",
    )
    parser.add_argument(
        "--stdout",
        action="store_true",
        help="Print JSON to stdout instead of writing manifest.json",
    )
    args = parser.parse_args()

    skill_dir = Path(args.skill_dir).resolve()
    if not skill_dir.is_dir():
        print(f"[ERROR] Not a directory: {skill_dir}", file=sys.stderr)
        sys.exit(1)

    manifest = scan_skill(skill_dir)

    if args.tree:
        print_tree(manifest)
    elif args.stdout:
        print(json.dumps(manifest, indent=2))
    else:
        output_path = skill_dir / "dependency-manifest.json"
        output_path.write_text(json.dumps(manifest, indent=2))
        print(f"[OK] Wrote dependency manifest to {output_path}")
        # Summarize
        total = sum(len(manifest.get(c, [])) for c in CATEGORIES)
        print(f"    Found {total} total dependency entries:")
        for cat in CATEGORIES:
            print(f"      {cat}: {len(manifest.get(cat, []))}")


if __name__ == "__main__":
    main()