📄 dep_manifest.py 11,960 bytes Tuesday 01:22 📋 Raw

!/usr/bin/env python3

"""
Skill Dependency Manifest Generator.

Scans a skill directory (SKILL.md + .py files) and produces a structured
JSON manifest of all dependencies: Python imports, CLI binaries, file
references, and referenced skill names.

Usage:
dep_manifest.py # Writes dependency-manifest.json to skill root
dep_manifest.py --stdout # JSON to stdout
dep_manifest.py --tree # Pretty-print tree to stdout
dep_manifest.py --tree --stdout # Pretty-print tree to stdout
"""

import argparse
import ast
import json
import re
import sys
from pathlib import Path

── Helpers ──────────────────────────────────────────────────────────────

STDLIB_MODULES: set[str] = {
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
"bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
"cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
"compileall", "concurrent", "configparser", "contextlib", "contextvars",
"copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
"dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
"distutils", "doctest", "email", "encodings", "enum", "errno",
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
"idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
"ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
"locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
"mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
"nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
"pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil",
"platform", "plistlib", "poplib", "posix", "posixpath", "pprint",
"profile", "pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc",
"queue", "quopri", "random", "re", "readline", "reprlib", "resource",
"rlcompleter", "runpy", "sched", "secrets", "select", "selectors",
"shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib",
"sndhdr", "socket", "socketserver", "sqlite3", "ssl", "stat", "statistics",
"string", "stringprep", "struct", "subprocess", "sunau", "symtable",
"sys", "sysconfig", "syslog", "tabnanny", "tarfile", "tempfile",
"termios", "test", "textwrap", "threading", "time", "timeit", "tkinter",
"token", "tokenize", "tomllib", "trace", "traceback", "tracemalloc",
"tty", "turtle", "turtledemo", "types", "typing", "unicodedata",
"unittest", "urllib", "uu", "uuid", "venv", "warnings", "wave",
"weakref", "webbrowser", "winreg", "winsound", "wsgiref", "xdrlib",
"xml", "xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", "zoneinfo",
}

def _top_level_module(name: str) -> str:
"""Return the top-level module name (e.g. 'os.path' -> 'os')."""
return name.split(".", 1)[0]

def _scan_py_imports(file_path: Path) -> list[dict]:
"""Parse a .py file and return import entries."""
entries: list[dict] = []
try:
tree = ast.parse(file_path.read_text(encoding="utf-8"))
except SyntaxError as e:
entries.append({
"name": "",
"sourceRef": str(file_path.relative_to(file_path.parents[1])
if len(file_path.parents) > 1 else file_path.name),
"status": "error",
"suggestion": f"Syntax error in {file_path.name}: {e}",
})
return entries

for node in ast.walk(tree):
    if isinstance(node, ast.Import):
        for alias in node.names:
            top = _top_level_module(alias.name)
            is_stdlib = top in STDLIB_MODULES
            entries.append({
                "name": alias.name,
                "sourceRef": str(file_path),
                "status": "ok" if is_stdlib else "warn",
                "suggestion": None if is_stdlib
                else f"Third-party module '{top}' -- ensure it's in requirements",
            })
    elif isinstance(node, ast.ImportFrom) and node.module:
        top = _top_level_module(node.module)
        is_stdlib = top in STDLIB_MODULES
        entries.append({
            "name": node.module,
            "sourceRef": str(file_path),
            "status": "ok" if is_stdlib else "warn",
            "suggestion": None if is_stdlib
            else f"Third-party module '{top}' -- ensure it's in requirements",
        })

return entries

── SKILL.md scanners ───────────────────────────────────────────────────

CLI_BIN_PATTERNS = re.compile(
r"(?im)\b(gh|jq|ffmpeg|ffprobe|curl|wget|sed|awk|grep|find|xargs|tr|sort|uniq|"
r"python3?|node|npm|npx|yarn|docker|docker-compose|make|cmake|git|"
r"rsync|scp|ssh|tmux|screen|tee|cat|head|tail|less|more|"
r"openclaw|clawhub|mcporter|himalaya|blu|songsee|gifgrep|"
r"xurl|ordercli|eightctl|wacli|whisper|"
r"brew|apt|pip|pip3|gem|cargo|go|rustc|"
r"openssl|gpg|age|sops|vault|"
r"convert|magick|identify|sox|"
r"sqlite3|psql|mysql|redis-cli|"
r"pdftotext|pdftoppm|pdfinfo|mutool|qpdf|"
r"pandoc|wkhtmltopdf|chromium|firefox)\b",
)

SKILL_DEP_PATTERN = re.compile(
r"(?im)(?:skill\s+(?:reference|dependency|depends?\b|name)"
r"s?\b[:-]?\s['\"]?)([a-z][a-z0-9-])"
)

Match paths under known resource directories or paths with extensions

FILE_REF_PATTERN = re.compile(
r"(?im)(?:scripts|references|assets|utils|tests?)/[\w./-]+.\w+"
)

def _scan_skill_md(skill_path: Path) -> dict:
"""Scan SKILL.md for bin deps, file refs, and skill deps."""
skill_md = skill_path / "SKILL.md"
if not skill_md.exists():
return {"binDeps": [], "fileRefs": [], "skillDeps": []}

content = skill_md.read_text(encoding="utf-8")

# CLI binary dependencies
bin_found: set[str] = set()
bin_deps: list[dict] = []
for m in CLI_BIN_PATTERNS.finditer(content):
    raw = m.group(0).strip().lower()
    if raw not in bin_found:
        bin_found.add(raw)
        bin_deps.append({
            "name": raw,
            "sourceRef": f"SKILL.md (line ~{content[:m.start()].count(chr(10)) + 1})",
            "status": "ok",
            "suggestion": None,
        })

# File references
file_refs: list[dict] = []
mentioned: set[str] = set()
for m in FILE_REF_PATTERN.finditer(content):
    raw = m.group(0).strip().lstrip("./")
    if raw in mentioned:
        continue
    mentioned.add(raw)

    full_path = skill_path / raw
    status = "ok" if full_path.exists() else "warn"
    file_refs.append({
        "name": raw,
        "sourceRef": f"SKILL.md (line ~{content[:m.start()].count(chr(10)) + 1})",
        "status": status,
        "suggestion": None if full_path.exists()
        else f"Referenced file '{raw}' not found",
    })

# Skill dependencies (referenced skill names in markdown)
skill_deps: list[dict] = []
dep_names: set[str] = set()
for m in SKILL_DEP_PATTERN.finditer(content):
    name = m.group(1).strip().lower()
    if name in dep_names:
        continue
    dep_names.add(name)
    skill_deps.append({
        "name": name,
        "sourceRef": f"SKILL.md (line ~{content[:m.start()].count(chr(10)) + 1})",
        "status": "warn",
        "suggestion": f"Referenced skill '{name}' -- verify it's available in skill registry",
    })

return {"binDeps": bin_deps, "fileRefs": file_refs, "skillDeps": skill_deps}

── Manifest builder ────────────────────────────────────────────────────

def build_manifest(skill_dir: Path) -> dict:
"""Build a complete dependency manifest for the given skill directory."""
if not skill_dir.exists():
print(f"[ERROR] Skill directory not found: {skill_dir}", file=sys.stderr)
sys.exit(1)

imports: list[dict] = []
py_files = sorted(skill_dir.rglob("*.py"))

for pyf in py_files:
    imports.extend(_scan_py_imports(pyf))

md_info = _scan_skill_md(skill_dir)

# Merge duplicate imports (same name, keep the most severe status)
seen: dict[str, dict] = {}
for entry in imports:
    key = entry["name"]
    if key in seen:
        existing = seen[key]
        severity = {"ok": 0, "warn": 1, "error": 2}
        if severity.get(entry["status"], 0) > severity.get(existing["status"], 0):
            existing["status"] = entry["status"]
            existing["suggestion"] = entry["suggestion"] or existing.get("suggestion")
            existing["sourceRef"] += "; " + entry["sourceRef"]
    else:
        seen[key] = dict(entry)
merged_imports = sorted(seen.values(), key=lambda x: x["name"])

return {
    "skillDir": str(skill_dir),
    "imports": merged_imports,
    "binDeps": sorted(md_info["binDeps"], key=lambda x: x["name"]),
    "fileRefs": sorted(md_info["fileRefs"], key=lambda x: x["name"]),
    "skillDeps": sorted(md_info["skillDeps"], key=lambda x: x["name"]),
}

── Output helpers ──────────────────────────────────────────────────────

def print_tree(manifest: dict) -> None:
"""Pretty-print manifest as an indented tree."""
print(f"Skill Directory: {manifest['skillDir']}")
print()

sections = [
    ("Python Imports", "imports"),
    ("CLI Binaries", "binDeps"),
    ("File References", "fileRefs"),
    ("Skill Dependencies", "skillDeps"),
]

for title, key in sections:
    entries = manifest[key]
    print(f"  -- {title} ({len(entries)}) --")
    if not entries:
        print("     (none)")
    for entry in entries:
        icon = {"ok": "+", "warn": "!", "error": "X"}.get(entry["status"], "?")
        line = f"    {icon} {entry['name']}"
        if entry.get("suggestion"):
            line += f"\n       |-- {entry['suggestion']}"
        print(line)
    print()

── CLI entrypoint ──────────────────────────────────────────────────────

def main():
parser = argparse.ArgumentParser(
description="Generate a dependency manifest for a skill directory.",
)
parser.add_argument("skill_dir", help="Path to the skill directory")
parser.add_argument("--tree", action="store_true",
help="Pretty-print tree instead of JSON")
parser.add_argument("--stdout", action="store_true",
help="Write JSON to stdout instead of file")
args = parser.parse_args()

skill_dir = Path(args.skill_dir).resolve()
manifest = build_manifest(skill_dir)

if args.tree:
    print_tree(manifest)
    return

json_str = json.dumps(manifest, indent=2, ensure_ascii=False)

if args.stdout:
    print(json_str)
else:
    out_path = skill_dir / "dependency-manifest.json"
    out_path.write_text(json_str, encoding="utf-8")
    print(f"[OK] Wrote dependency manifest to {out_path}")

if name == "main":
main()