📄 dep_manifest.py 16,721 bytes Tuesday 01:07 📋 Raw

!/usr/bin/env python3

"""
Skill Dependency Manifest Generator

Scans a skill directory and generates a dependency manifest JSON (dependency-manifest.json)
detecting Python imports, file references, CLI tool dependencies, and skill dependencies.

Usage:
dep_manifest.py # Write dependency-manifest.json
dep_manifest.py --tree # CLI pretty-print instead
dep_manifest.py --stdout # Print JSON to stdout
"""

import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any

── Constants ──────────────────────────────────────────────────────────────

Shared constants for output schema (matches SPEC.md contract)

CATEGORIES = ["imports", "fileRefs", "binDeps", "skillDeps"]

PYTHON_STDLIB = {
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
"bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
"cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
"compileall", "concurrent", "configparser", "contextlib", "contextvars",
"copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
"dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
"distutils", "doctest", "email", "encodings", "enum", "errno",
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
"idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
"ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
"locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
"mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
"nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
"pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil",
"platform", "plistlib", "poplib", "posix", "posixpath", "pprint",
"profile", "pstats", "pty", "pwd", "py_compile", "pyclbr",
"pydoc", "queue", "quopri", "random", "re", "readline", "reprlib",
"resource", "rlcompleter", "runpy", "sched", "secrets", "select",
"selectors", "shelve", "shlex", "shutil", "signal", "site", "smtpd",
"smtplib", "sndhdr", "socket", "socketserver", "sqlite3", "ssl",
"stat", "statistics", "string", "stringprep", "struct", "subprocess",
"sunau", "symtable", "sys", "sysconfig", "syslog", "tabnanny",
"tarfile", "telnetlib", "tempfile", "termios", "test", "textwrap",
"threading", "time", "timeit", "tkinter", "token", "tokenize",
"tomllib", "trace", "traceback", "tracemalloc", "tty", "turtle",
"turtledemo", "types", "typing", "unicodedata", "unittest", "urllib",
"uu", "uuid", "venv", "warnings", "wave", "weakref", "webbrowser",
"winreg", "winsound", "wsgiref", "xdrlib", "xml", "xmlrpc",
"zipapp", "zipfile", "zipimport", "zlib",
}

── Detectors ──────────────────────────────────────────────────────────────

def detect_python_imports(file_path: Path) -> list[dict[str, Any]]:
"""Scan a .py file for import statements."""
results: list[dict[str, Any]] = []
try:
content = file_path.read_text(encoding="utf-8", errors="replace")
except OSError:
return results

# Match: import foo, import foo.bar, from foo import bar
patterns = [
    re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"),
    re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"),
]

for line_num, line in enumerate(content.splitlines(), 1):
    for pattern in patterns:
        m = pattern.match(line)
        if m:
            mod_name = m.group(1)
            # Take the top-level module name
            top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
            in_stdlib = top_mod in PYTHON_STDLIB
            results.append({
                "name": mod_name,
                "line": line_num,
                "file": str(file_path.relative_to(file_path.parent.parent) if file_path.parent.parent else file_path.name),
                "type": "stdlib" if in_stdlib else "third-party",
                "status": "ok" if in_stdlib else "warn",
                "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
            })
return results

def detect_imports_from_code_blocks(content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan code blocks in markdown for Python imports."""
results: list[dict[str, Any]] = []
patterns = [
re.compile(r"^\simport\s+([a-zA-Z_][a-zA-Z0-9_.])"),
re.compile(r"^\sfrom\s+([a-zA-Z_][a-zA-Z0-9_.])\s+import"),
]

# Find fenced code blocks
block_pattern = re.compile(r"```(?:python|py|bash|sh|shell)?\n(.*?)```", re.DOTALL)
for block_match in block_pattern.finditer(content):
    block_text = block_match.group(1)
    # Only scan python blocks for imports
    # For simplicity, scan all blocks but only report python-related ones
    for line_num_offset, line in enumerate(block_text.splitlines(), 1):
        for pattern in patterns:
            m = pattern.match(line)
            if m:
                mod_name = m.group(1)
                top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
                in_stdlib = top_mod in PYTHON_STDLIB
                results.append({
                    "name": mod_name,
                    "line": f"code-block:{line_num_offset}",
                    "file": source_file,
                    "type": "stdlib" if in_stdlib else "third-party",
                    "status": "ok" if in_stdlib else "warn",
                    "suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
                })
return results

def detect_file_references(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for file/directory references (paths to scripts/, references/, assets/)."""
results: list[dict[str, Any]] = []
# Look for markdown links, backtick paths, and inline references
patterns = [
# Markdown links like text
re.compile(r"[.*?](((?:scripts/|references/|assets/)\S+))"),
# Backtick paths like scripts/foo.py
re.compile(r"((?:scripts/|references/|assets/)\S+)"),
# Inline mentions like (see scripts/foo.py)
re.compile(r"(((?:scripts/|references/|assets/)\S+))"),
]

for line_num, line in enumerate(skill_md_content.splitlines(), 1):
    for pattern in patterns:
        for m in pattern.finditer(line):
            ref_path = m.group(1).rstrip(".)`")
            results.append({
                "name": ref_path,
                "line": line_num,
                "file": source_file,
                "status": "ok",  # Will be re-evaluated after full scan
                "suggestion": "",
            })

return results

KNOWN_CLI_TOOLS = [
"gh", "jq", "ffmpeg", "ffprobe", "git", "python3", "python", "node",
"npm", "npx", "docker", "docker-compose", "curl", "wget", "rsync",
"trash", "sips", "convert", "magick", "identify", "pdftotext",
"pdftoppm", "pdfunite", "qpdf", "exiftool", "sox", "youtube-dl",
"yt-dlp", "grep", "sed", "awk", "sort", "uniq", "tee", "xargs",
"find", "rg", "fzf", "bat", "delta", "tmux", "screen", "ssh",
"scp", "tailscale", "systemctl", "journalctl", "cargo", "go",
"ruby", "perl", "make", "cmake", "pip3", "pip", "poetry", "uv",
"brew", "apt", "apt-get", "yum", "dnf", "pacman", "nix-env",
"helm", "kubectl", "terraform", "ansible", "pulumi", "vault",
"aws", "gcloud", "az", "psql", "mysql", "sqlite3", "redis-cli",
]

def detect_bin_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for mentions of CLI tools."""
results: list[dict[str, Any]] = []
seen: set[str] = set()

for line_num, line in enumerate(skill_md_content.splitlines(), 1):
    for tool in KNOWN_CLI_TOOLS:
        # Match as a word boundary pattern
        pattern = re.compile(rf"(?<![a-zA-Z0-9./-]){re.escape(tool)}(?![a-zA-Z0-9./-])")
        if pattern.search(line) and tool not in seen:
            seen.add(tool)
            results.append({
                "name": tool,
                "line": line_num,
                "file": source_file,
                "status": "warn",
                "suggestion": f"CLI tool '{tool}' must be installed on the system",
            })
return results

def detect_skill_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for 'name:' frontmatter references to other skills."""
results: list[dict[str, Any]] = []
# Look for skill references in the body (not the skill's own frontmatter)
# Patterns: inline skill descriptions, "See X skill", skill name mentions
skill_ref_patterns = [
re.compile(r"(?:see|using|via|with)\s+the?\s+([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE),
re.compile(r"([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE),
]

for line_num, line in enumerate(skill_md_content.splitlines(), 1):
    for pattern in skill_ref_patterns:
        for m in pattern.finditer(line):
            skill_name = m.group(1).lower().strip()
            results.append({
                "name": skill_name,
                "line": line_num,
                "file": source_file,
                "status": "warn",
                "suggestion": f"Referenced skill '{skill_name}' must be installed",
            })
return results

── Path validation ────────────────────────────────────────────────────────

def validate_existing_paths(results: list[dict[str, Any]], skill_dir: Path) -> None:
"""Update status of fileRefs based on whether the referenced path exists."""
for entry in results:
if "fileRefs" not in str(entry): # We'll check from outside
continue
# Done in caller for category separation

def check_file_refs_exist(entries: list[dict[str, Any]], skill_dir: Path) -> None:
"""Check if referenced files actually exist in the skill directory."""
for entry in entries:
ref_path = entry["name"]
full_path = skill_dir / ref_path
if full_path.exists() or full_path.is_symlink():
entry["status"] = "ok"
entry["suggestion"] = ""
else:
entry["status"] = "error"
entry["suggestion"] = f"Referenced path '{ref_path}' does not exist in skill directory"

── Main scan ──────────────────────────────────────────────────────────────

def scan_skill(skill_dir: Path) -> dict[str, Any]:
"""Scan a skill directory and return the dependency manifest."""
imports: list[dict[str, Any]] = []
file_refs: list[dict[str, Any]] = []
bin_deps: list[dict[str, Any]] = []
skill_deps: list[dict[str, Any]] = []

# Scan all .py files in scripts/ and subdirectories
for py_file in skill_dir.rglob("*.py"):
    imports.extend(detect_python_imports(py_file))

# Scan SKILL.md
skill_md = skill_dir / "SKILL.md"
if skill_md.exists():
    try:
        content = skill_md.read_text(encoding="utf-8", errors="replace")
    except OSError:
        content = ""
else:
    content = ""

if content:
    # Imports from code blocks in SKILL.md
    imports_from_md = detect_imports_from_code_blocks(content, "SKILL.md")
    imports.extend(imports_from_md)

    # File references in SKILL.md
    file_refs = detect_file_references(content, "SKILL.md")

    # CLI tool deps in SKILL.md
    bin_deps = detect_bin_deps(content, "SKILL.md")

    # Skill deps in SKILL.md
    skill_deps = detect_skill_deps(content, "SKILL.md")

# Validate file references
if file_refs:
    check_file_refs_exist(file_refs, skill_dir)

# Deduplicate imports by (name, file) pairs
seen_imports: set[tuple[str, str]] = set()
deduped_imports = []
for entry in imports:
    key = (entry["name"], entry["file"])
    if key not in seen_imports:
        seen_imports.add(key)
        deduped_imports.append(entry)
imports = deduped_imports

# Deduplicate bin deps by name
seen_bins: set[str] = set()
deduped_bins = []
for entry in bin_deps:
    if entry["name"] not in seen_bins:
        seen_bins.add(entry["name"])
        deduped_bins.append(entry)
bin_deps = deduped_bins

# Deduplicate skill deps by name
seen_skills: set[str] = set()
deduped_skills = []
for entry in skill_deps:
    if entry["name"] not in seen_skills:
        seen_skills.add(entry["name"])
        deduped_skills.append(entry)
skill_deps = deduped_skills

return {
    "manifestVersion": "1.0",
    "skillDir": str(skill_dir.resolve()),
    "imports": imports,
    "fileRefs": file_refs,
    "binDeps": bin_deps,
    "skillDeps": skill_deps,
}

── Pretty print ───────────────────────────────────────────────────────────

def print_tree(manifest: dict[str, Any]) -> None:
"""Pretty-print the manifest as a CLI tree."""
print(f"\n{'='60}")
print(f" Dependency Manifest: {manifest['skillDir']}")
print(f" Version: {manifest['manifestVersion']}")
print(f"{'='60}")

for category in CATEGORIES:
    entries = manifest.get(category, [])
    if not entries:
        continue

    # Category label
    labels = {"imports": "Python Imports", "fileRefs": "File References",
               "binDeps": "CLI Tools", "skillDeps": "Skill Dependencies"}
    label = labels.get(category, category)
    print(f"\n  📦 {label} ({len(entries)})")
    print(f"  {'─'*58}")

    for entry in entries:
        status_icon = {"ok": "✓", "warn": "⚠", "error": "✗"}.get(entry.get("status", ""), "?")
        name = entry["name"]
        line = entry.get("line", "?")
        suggestion = entry.get("suggestion", "")
        print(f"    {status_icon} {name}  (line {line})")
        if suggestion:
            print(f"       → {suggestion}")

── Entry point ────────────────────────────────────────────────────────────

def main() -> None:
parser = argparse.ArgumentParser(
description="Generate dependency manifest for a skill directory",
)
parser.add_argument("skill_dir", help="Path to skill directory")
parser.add_argument(
"--tree",
action="store_true",
help="Print dependency tree to stdout instead of writing manifest.json",
)
parser.add_argument(
"--stdout",
action="store_true",
help="Print JSON to stdout instead of writing manifest.json",
)
args = parser.parse_args()

skill_dir = Path(args.skill_dir).resolve()
if not skill_dir.is_dir():
    print(f"[ERROR] Not a directory: {skill_dir}", file=sys.stderr)
    sys.exit(1)

manifest = scan_skill(skill_dir)

if args.tree:
    print_tree(manifest)
elif args.stdout:
    print(json.dumps(manifest, indent=2))
else:
    output_path = skill_dir / "dependency-manifest.json"
    output_path.write_text(json.dumps(manifest, indent=2))
    print(f"[OK] Wrote dependency manifest to {output_path}")
    # Summarize
    total = sum(len(manifest.get(c, [])) for c in CATEGORIES)
    print(f"    Found {total} total dependency entries:")
    for cat in CATEGORIES:
        print(f"      {cat}: {len(manifest.get(cat, []))}")

if name == "main":
main()

← Back