#!/usr/bin/env python3 """ Skill Dependency Manifest Generator Scans a skill directory and generates a dependency manifest JSON (dependency-manifest.json) detecting Python imports, file references, CLI tool dependencies, and skill dependencies. Usage: dep_manifest.py # Write dependency-manifest.json dep_manifest.py --tree # CLI pretty-print instead dep_manifest.py --stdout # Print JSON to stdout """ import argparse import json import re import sys from pathlib import Path from typing import Any # ── Constants ────────────────────────────────────────────────────────────── # Shared constants for output schema (matches SPEC.md contract) CATEGORIES = ["imports", "fileRefs", "binDeps", "skillDeps"] PYTHON_STDLIB = { "abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio", "asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex", "bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk", "cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys", "compileall", "concurrent", "configparser", "contextlib", "contextvars", "copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal", "difflib", "dis", "distutils", "doctest", "email", "encodings", "enum", "errno", "faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions", "ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob", "graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http", "idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io", "ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache", "locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math", "mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis", "nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev", "pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil", "platform", "plistlib", "poplib", "posix", "posixpath", "pprint", "profile", "pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc", "queue", "quopri", "random", "re", "readline", "reprlib", "resource", "rlcompleter", "runpy", "sched", "secrets", "select", "selectors", "shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket", "socketserver", "sqlite3", "ssl", "stat", "statistics", "string", "stringprep", "struct", "subprocess", "sunau", "symtable", "sys", "sysconfig", "syslog", "tabnanny", "tarfile", "telnetlib", "tempfile", "termios", "test", "textwrap", "threading", "time", "timeit", "tkinter", "token", "tokenize", "tomllib", "trace", "traceback", "tracemalloc", "tty", "turtle", "turtledemo", "types", "typing", "unicodedata", "unittest", "urllib", "uu", "uuid", "venv", "warnings", "wave", "weakref", "webbrowser", "winreg", "winsound", "wsgiref", "xdrlib", "xml", "xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", } # ── Detectors ────────────────────────────────────────────────────────────── def detect_python_imports(file_path: Path) -> list[dict[str, Any]]: """Scan a .py file for import statements.""" results: list[dict[str, Any]] = [] try: content = file_path.read_text(encoding="utf-8", errors="replace") except OSError: return results # Match: import foo, import foo.bar, from foo import bar patterns = [ re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"), re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"), ] for line_num, line in enumerate(content.splitlines(), 1): for pattern in patterns: m = pattern.match(line) if m: mod_name = m.group(1) # Take the top-level module name top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name in_stdlib = top_mod in PYTHON_STDLIB results.append({ "name": mod_name, "line": line_num, "file": str(file_path.relative_to(file_path.parent.parent) if file_path.parent.parent else file_path.name), "type": "stdlib" if in_stdlib else "third-party", "status": "ok" if in_stdlib else "warn", "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed", }) return results def detect_imports_from_code_blocks(content: str, source_file: str) -> list[dict[str, Any]]: """Scan code blocks in markdown for Python imports.""" results: list[dict[str, Any]] = [] patterns = [ re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"), re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"), ] # Find fenced code blocks block_pattern = re.compile(r"```(?:python|py|bash|sh|shell)?\n(.*?)```", re.DOTALL) for block_match in block_pattern.finditer(content): block_text = block_match.group(1) # Only scan python blocks for imports # For simplicity, scan all blocks but only report python-related ones for line_num_offset, line in enumerate(block_text.splitlines(), 1): for pattern in patterns: m = pattern.match(line) if m: mod_name = m.group(1) top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name in_stdlib = top_mod in PYTHON_STDLIB results.append({ "name": mod_name, "line": f"code-block:{line_num_offset}", "file": source_file, "type": "stdlib" if in_stdlib else "third-party", "status": "ok" if in_stdlib else "warn", "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed", }) return results def detect_file_references(skill_md_content: str, source_file: str) -> list[dict[str, Any]]: """Scan SKILL.md for file/directory references (paths to scripts/, references/, assets/).""" results: list[dict[str, Any]] = [] # Look for markdown links, backtick paths, and inline references patterns = [ # Markdown links like [text](path) re.compile(r"\[.*?\]\(((?:scripts/|references/|assets/)\S+)\)"), # Backtick paths like `scripts/foo.py` re.compile(r"`((?:scripts/|references/|assets/)\S+)`"), # Inline mentions like (see scripts/foo.py) re.compile(r"\(((?:scripts/|references/|assets/)\S+)\)"), ] for line_num, line in enumerate(skill_md_content.splitlines(), 1): for pattern in patterns: for m in pattern.finditer(line): ref_path = m.group(1).rstrip(".)`") results.append({ "name": ref_path, "line": line_num, "file": source_file, "status": "ok", # Will be re-evaluated after full scan "suggestion": "", }) return results KNOWN_CLI_TOOLS = [ "gh", "jq", "ffmpeg", "ffprobe", "git", "python3", "python", "node", "npm", "npx", "docker", "docker-compose", "curl", "wget", "rsync", "trash", "sips", "convert", "magick", "identify", "pdftotext", "pdftoppm", "pdfunite", "qpdf", "exiftool", "sox", "youtube-dl", "yt-dlp", "grep", "sed", "awk", "sort", "uniq", "tee", "xargs", "find", "rg", "fzf", "bat", "delta", "tmux", "screen", "ssh", "scp", "tailscale", "systemctl", "journalctl", "cargo", "go", "ruby", "perl", "make", "cmake", "pip3", "pip", "poetry", "uv", "brew", "apt", "apt-get", "yum", "dnf", "pacman", "nix-env", "helm", "kubectl", "terraform", "ansible", "pulumi", "vault", "aws", "gcloud", "az", "psql", "mysql", "sqlite3", "redis-cli", ] def detect_bin_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]: """Scan SKILL.md for mentions of CLI tools.""" results: list[dict[str, Any]] = [] seen: set[str] = set() for line_num, line in enumerate(skill_md_content.splitlines(), 1): for tool in KNOWN_CLI_TOOLS: # Match as a word boundary pattern pattern = re.compile(rf"(? list[dict[str, Any]]: """Scan SKILL.md for 'name:' frontmatter references to other skills.""" results: list[dict[str, Any]] = [] # Look for skill references in the body (not the skill's own frontmatter) # Patterns: inline skill descriptions, "See X skill", skill name mentions skill_ref_patterns = [ re.compile(r"(?:see|using|via|with)\s+the?\s+([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE), re.compile(r"`([a-z][a-z0-9-]+)`\s+skill", re.IGNORECASE), ] for line_num, line in enumerate(skill_md_content.splitlines(), 1): for pattern in skill_ref_patterns: for m in pattern.finditer(line): skill_name = m.group(1).lower().strip() results.append({ "name": skill_name, "line": line_num, "file": source_file, "status": "warn", "suggestion": f"Referenced skill '{skill_name}' must be installed", }) return results # ── Path validation ──────────────────────────────────────────────────────── def validate_existing_paths(results: list[dict[str, Any]], skill_dir: Path) -> None: """Update status of fileRefs based on whether the referenced path exists.""" for entry in results: if "fileRefs" not in str(entry): # We'll check from outside continue # Done in caller for category separation def check_file_refs_exist(entries: list[dict[str, Any]], skill_dir: Path) -> None: """Check if referenced files actually exist in the skill directory.""" for entry in entries: ref_path = entry["name"] full_path = skill_dir / ref_path if full_path.exists() or full_path.is_symlink(): entry["status"] = "ok" entry["suggestion"] = "" else: entry["status"] = "error" entry["suggestion"] = f"Referenced path '{ref_path}' does not exist in skill directory" # ── Main scan ────────────────────────────────────────────────────────────── def scan_skill(skill_dir: Path) -> dict[str, Any]: """Scan a skill directory and return the dependency manifest.""" imports: list[dict[str, Any]] = [] file_refs: list[dict[str, Any]] = [] bin_deps: list[dict[str, Any]] = [] skill_deps: list[dict[str, Any]] = [] # Scan all .py files in scripts/ and subdirectories for py_file in skill_dir.rglob("*.py"): imports.extend(detect_python_imports(py_file)) # Scan SKILL.md skill_md = skill_dir / "SKILL.md" if skill_md.exists(): try: content = skill_md.read_text(encoding="utf-8", errors="replace") except OSError: content = "" else: content = "" if content: # Imports from code blocks in SKILL.md imports_from_md = detect_imports_from_code_blocks(content, "SKILL.md") imports.extend(imports_from_md) # File references in SKILL.md file_refs = detect_file_references(content, "SKILL.md") # CLI tool deps in SKILL.md bin_deps = detect_bin_deps(content, "SKILL.md") # Skill deps in SKILL.md skill_deps = detect_skill_deps(content, "SKILL.md") # Validate file references if file_refs: check_file_refs_exist(file_refs, skill_dir) # Deduplicate imports by (name, file) pairs seen_imports: set[tuple[str, str]] = set() deduped_imports = [] for entry in imports: key = (entry["name"], entry["file"]) if key not in seen_imports: seen_imports.add(key) deduped_imports.append(entry) imports = deduped_imports # Deduplicate bin deps by name seen_bins: set[str] = set() deduped_bins = [] for entry in bin_deps: if entry["name"] not in seen_bins: seen_bins.add(entry["name"]) deduped_bins.append(entry) bin_deps = deduped_bins # Deduplicate skill deps by name seen_skills: set[str] = set() deduped_skills = [] for entry in skill_deps: if entry["name"] not in seen_skills: seen_skills.add(entry["name"]) deduped_skills.append(entry) skill_deps = deduped_skills return { "manifestVersion": "1.0", "skillDir": str(skill_dir.resolve()), "imports": imports, "fileRefs": file_refs, "binDeps": bin_deps, "skillDeps": skill_deps, } # ── Pretty print ─────────────────────────────────────────────────────────── def print_tree(manifest: dict[str, Any]) -> None: """Pretty-print the manifest as a CLI tree.""" print(f"\n{'='*60}") print(f" Dependency Manifest: {manifest['skillDir']}") print(f" Version: {manifest['manifestVersion']}") print(f"{'='*60}") for category in CATEGORIES: entries = manifest.get(category, []) if not entries: continue # Category label labels = {"imports": "Python Imports", "fileRefs": "File References", "binDeps": "CLI Tools", "skillDeps": "Skill Dependencies"} label = labels.get(category, category) print(f"\n 📦 {label} ({len(entries)})") print(f" {'─'*58}") for entry in entries: status_icon = {"ok": "✓", "warn": "⚠", "error": "✗"}.get(entry.get("status", ""), "?") name = entry["name"] line = entry.get("line", "?") suggestion = entry.get("suggestion", "") print(f" {status_icon} {name} (line {line})") if suggestion: print(f" → {suggestion}") # ── Entry point ──────────────────────────────────────────────────────────── def main() -> None: parser = argparse.ArgumentParser( description="Generate dependency manifest for a skill directory", ) parser.add_argument("skill_dir", help="Path to skill directory") parser.add_argument( "--tree", action="store_true", help="Print dependency tree to stdout instead of writing manifest.json", ) parser.add_argument( "--stdout", action="store_true", help="Print JSON to stdout instead of writing manifest.json", ) args = parser.parse_args() skill_dir = Path(args.skill_dir).resolve() if not skill_dir.is_dir(): print(f"[ERROR] Not a directory: {skill_dir}", file=sys.stderr) sys.exit(1) manifest = scan_skill(skill_dir) if args.tree: print_tree(manifest) elif args.stdout: print(json.dumps(manifest, indent=2)) else: output_path = skill_dir / "dependency-manifest.json" output_path.write_text(json.dumps(manifest, indent=2)) print(f"[OK] Wrote dependency manifest to {output_path}") # Summarize total = sum(len(manifest.get(c, [])) for c in CATEGORIES) print(f" Found {total} total dependency entries:") for cat in CATEGORIES: print(f" {cat}: {len(manifest.get(cat, []))}") if __name__ == "__main__": main()