!/usr/bin/env python3
"""
Skill Dependency Manifest Generator
Scans a skill directory and generates a dependency manifest JSON (dependency-manifest.json)
detecting Python imports, file references, CLI tool dependencies, and skill dependencies.
Usage:
dep_manifest.py
dep_manifest.py
dep_manifest.py
"""
import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any
── Constants ──────────────────────────────────────────────────────────────
Shared constants for output schema (matches SPEC.md contract)
CATEGORIES = ["imports", "fileRefs", "binDeps", "skillDeps"]
PYTHON_STDLIB = {
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex",
"bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk",
"cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys",
"compileall", "concurrent", "configparser", "contextlib", "contextvars",
"copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses",
"dataclasses", "datetime", "dbm", "decimal", "difflib", "dis",
"distutils", "doctest", "email", "encodings", "enum", "errno",
"faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "fractions",
"ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob",
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http",
"idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io",
"ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache",
"locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math",
"mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis",
"nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev",
"pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil",
"platform", "plistlib", "poplib", "posix", "posixpath", "pprint",
"profile", "pstats", "pty", "pwd", "py_compile", "pyclbr",
"pydoc", "queue", "quopri", "random", "re", "readline", "reprlib",
"resource", "rlcompleter", "runpy", "sched", "secrets", "select",
"selectors", "shelve", "shlex", "shutil", "signal", "site", "smtpd",
"smtplib", "sndhdr", "socket", "socketserver", "sqlite3", "ssl",
"stat", "statistics", "string", "stringprep", "struct", "subprocess",
"sunau", "symtable", "sys", "sysconfig", "syslog", "tabnanny",
"tarfile", "telnetlib", "tempfile", "termios", "test", "textwrap",
"threading", "time", "timeit", "tkinter", "token", "tokenize",
"tomllib", "trace", "traceback", "tracemalloc", "tty", "turtle",
"turtledemo", "types", "typing", "unicodedata", "unittest", "urllib",
"uu", "uuid", "venv", "warnings", "wave", "weakref", "webbrowser",
"winreg", "winsound", "wsgiref", "xdrlib", "xml", "xmlrpc",
"zipapp", "zipfile", "zipimport", "zlib",
}
── Detectors ──────────────────────────────────────────────────────────────
def detect_python_imports(file_path: Path) -> list[dict[str, Any]]:
"""Scan a .py file for import statements."""
results: list[dict[str, Any]] = []
try:
content = file_path.read_text(encoding="utf-8", errors="replace")
except OSError:
return results
# Match: import foo, import foo.bar, from foo import bar
patterns = [
re.compile(r"^\s*import\s+([a-zA-Z_][a-zA-Z0-9_.]*)"),
re.compile(r"^\s*from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import"),
]
for line_num, line in enumerate(content.splitlines(), 1):
for pattern in patterns:
m = pattern.match(line)
if m:
mod_name = m.group(1)
# Take the top-level module name
top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
in_stdlib = top_mod in PYTHON_STDLIB
results.append({
"name": mod_name,
"line": line_num,
"file": str(file_path.relative_to(file_path.parent.parent) if file_path.parent.parent else file_path.name),
"type": "stdlib" if in_stdlib else "third-party",
"status": "ok" if in_stdlib else "warn",
"suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
})
return results
def detect_imports_from_code_blocks(content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan code blocks in markdown for Python imports."""
results: list[dict[str, Any]] = []
patterns = [
re.compile(r"^\simport\s+([a-zA-Z_][a-zA-Z0-9_.])"),
re.compile(r"^\sfrom\s+([a-zA-Z_][a-zA-Z0-9_.])\s+import"),
]
# Find fenced code blocks
block_pattern = re.compile(r"```(?:python|py|bash|sh|shell)?\n(.*?)```", re.DOTALL)
for block_match in block_pattern.finditer(content):
block_text = block_match.group(1)
# Only scan python blocks for imports
# For simplicity, scan all blocks but only report python-related ones
for line_num_offset, line in enumerate(block_text.splitlines(), 1):
for pattern in patterns:
m = pattern.match(line)
if m:
mod_name = m.group(1)
top_mod = mod_name.split(".")[0] if "." in mod_name else mod_name
in_stdlib = top_mod in PYTHON_STDLIB
results.append({
"name": mod_name,
"line": f"code-block:{line_num_offset}",
"file": source_file,
"type": "stdlib" if in_stdlib else "third-party",
"status": "ok" if in_stdlib else "warn",
"suggestion": "" if in_stdlib else f"Third-party module '{top_mod}' must be installed",
})
return results
def detect_file_references(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for file/directory references (paths to scripts/, references/, assets/)."""
results: list[dict[str, Any]] = []
# Look for markdown links, backtick paths, and inline references
patterns = [
# Markdown links like text
re.compile(r"[.*?](((?:scripts/|references/|assets/)\S+))"),
# Backtick paths like scripts/foo.py
re.compile(r"((?:scripts/|references/|assets/)\S+)"),
# Inline mentions like (see scripts/foo.py)
re.compile(r"(((?:scripts/|references/|assets/)\S+))"),
]
for line_num, line in enumerate(skill_md_content.splitlines(), 1):
for pattern in patterns:
for m in pattern.finditer(line):
ref_path = m.group(1).rstrip(".)`")
results.append({
"name": ref_path,
"line": line_num,
"file": source_file,
"status": "ok", # Will be re-evaluated after full scan
"suggestion": "",
})
return results
KNOWN_CLI_TOOLS = [
"gh", "jq", "ffmpeg", "ffprobe", "git", "python3", "python", "node",
"npm", "npx", "docker", "docker-compose", "curl", "wget", "rsync",
"trash", "sips", "convert", "magick", "identify", "pdftotext",
"pdftoppm", "pdfunite", "qpdf", "exiftool", "sox", "youtube-dl",
"yt-dlp", "grep", "sed", "awk", "sort", "uniq", "tee", "xargs",
"find", "rg", "fzf", "bat", "delta", "tmux", "screen", "ssh",
"scp", "tailscale", "systemctl", "journalctl", "cargo", "go",
"ruby", "perl", "make", "cmake", "pip3", "pip", "poetry", "uv",
"brew", "apt", "apt-get", "yum", "dnf", "pacman", "nix-env",
"helm", "kubectl", "terraform", "ansible", "pulumi", "vault",
"aws", "gcloud", "az", "psql", "mysql", "sqlite3", "redis-cli",
]
def detect_bin_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for mentions of CLI tools."""
results: list[dict[str, Any]] = []
seen: set[str] = set()
for line_num, line in enumerate(skill_md_content.splitlines(), 1):
for tool in KNOWN_CLI_TOOLS:
# Match as a word boundary pattern
pattern = re.compile(rf"(?<![a-zA-Z0-9./-]){re.escape(tool)}(?![a-zA-Z0-9./-])")
if pattern.search(line) and tool not in seen:
seen.add(tool)
results.append({
"name": tool,
"line": line_num,
"file": source_file,
"status": "warn",
"suggestion": f"CLI tool '{tool}' must be installed on the system",
})
return results
def detect_skill_deps(skill_md_content: str, source_file: str) -> list[dict[str, Any]]:
"""Scan SKILL.md for 'name:' frontmatter references to other skills."""
results: list[dict[str, Any]] = []
# Look for skill references in the body (not the skill's own frontmatter)
# Patterns: inline skill descriptions, "See X skill", skill name mentions
skill_ref_patterns = [
re.compile(r"(?:see|using|via|with)\s+the?\s+([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE),
re.compile(r"([a-z][a-z0-9-]+)\s+skill", re.IGNORECASE),
]
for line_num, line in enumerate(skill_md_content.splitlines(), 1):
for pattern in skill_ref_patterns:
for m in pattern.finditer(line):
skill_name = m.group(1).lower().strip()
results.append({
"name": skill_name,
"line": line_num,
"file": source_file,
"status": "warn",
"suggestion": f"Referenced skill '{skill_name}' must be installed",
})
return results
── Path validation ────────────────────────────────────────────────────────
def validate_existing_paths(results: list[dict[str, Any]], skill_dir: Path) -> None:
"""Update status of fileRefs based on whether the referenced path exists."""
for entry in results:
if "fileRefs" not in str(entry): # We'll check from outside
continue
# Done in caller for category separation
def check_file_refs_exist(entries: list[dict[str, Any]], skill_dir: Path) -> None:
"""Check if referenced files actually exist in the skill directory."""
for entry in entries:
ref_path = entry["name"]
full_path = skill_dir / ref_path
if full_path.exists() or full_path.is_symlink():
entry["status"] = "ok"
entry["suggestion"] = ""
else:
entry["status"] = "error"
entry["suggestion"] = f"Referenced path '{ref_path}' does not exist in skill directory"
── Main scan ──────────────────────────────────────────────────────────────
def scan_skill(skill_dir: Path) -> dict[str, Any]:
"""Scan a skill directory and return the dependency manifest."""
imports: list[dict[str, Any]] = []
file_refs: list[dict[str, Any]] = []
bin_deps: list[dict[str, Any]] = []
skill_deps: list[dict[str, Any]] = []
# Scan all .py files in scripts/ and subdirectories
for py_file in skill_dir.rglob("*.py"):
imports.extend(detect_python_imports(py_file))
# Scan SKILL.md
skill_md = skill_dir / "SKILL.md"
if skill_md.exists():
try:
content = skill_md.read_text(encoding="utf-8", errors="replace")
except OSError:
content = ""
else:
content = ""
if content:
# Imports from code blocks in SKILL.md
imports_from_md = detect_imports_from_code_blocks(content, "SKILL.md")
imports.extend(imports_from_md)
# File references in SKILL.md
file_refs = detect_file_references(content, "SKILL.md")
# CLI tool deps in SKILL.md
bin_deps = detect_bin_deps(content, "SKILL.md")
# Skill deps in SKILL.md
skill_deps = detect_skill_deps(content, "SKILL.md")
# Validate file references
if file_refs:
check_file_refs_exist(file_refs, skill_dir)
# Deduplicate imports by (name, file) pairs
seen_imports: set[tuple[str, str]] = set()
deduped_imports = []
for entry in imports:
key = (entry["name"], entry["file"])
if key not in seen_imports:
seen_imports.add(key)
deduped_imports.append(entry)
imports = deduped_imports
# Deduplicate bin deps by name
seen_bins: set[str] = set()
deduped_bins = []
for entry in bin_deps:
if entry["name"] not in seen_bins:
seen_bins.add(entry["name"])
deduped_bins.append(entry)
bin_deps = deduped_bins
# Deduplicate skill deps by name
seen_skills: set[str] = set()
deduped_skills = []
for entry in skill_deps:
if entry["name"] not in seen_skills:
seen_skills.add(entry["name"])
deduped_skills.append(entry)
skill_deps = deduped_skills
return {
"manifestVersion": "1.0",
"skillDir": str(skill_dir.resolve()),
"imports": imports,
"fileRefs": file_refs,
"binDeps": bin_deps,
"skillDeps": skill_deps,
}
── Pretty print ───────────────────────────────────────────────────────────
def print_tree(manifest: dict[str, Any]) -> None:
"""Pretty-print the manifest as a CLI tree."""
print(f"\n{'='60}")
print(f" Dependency Manifest: {manifest['skillDir']}")
print(f" Version: {manifest['manifestVersion']}")
print(f"{'='60}")
for category in CATEGORIES:
entries = manifest.get(category, [])
if not entries:
continue
# Category label
labels = {"imports": "Python Imports", "fileRefs": "File References",
"binDeps": "CLI Tools", "skillDeps": "Skill Dependencies"}
label = labels.get(category, category)
print(f"\n 📦 {label} ({len(entries)})")
print(f" {'─'*58}")
for entry in entries:
status_icon = {"ok": "✓", "warn": "⚠", "error": "✗"}.get(entry.get("status", ""), "?")
name = entry["name"]
line = entry.get("line", "?")
suggestion = entry.get("suggestion", "")
print(f" {status_icon} {name} (line {line})")
if suggestion:
print(f" → {suggestion}")
── Entry point ────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(
description="Generate dependency manifest for a skill directory",
)
parser.add_argument("skill_dir", help="Path to skill directory")
parser.add_argument(
"--tree",
action="store_true",
help="Print dependency tree to stdout instead of writing manifest.json",
)
parser.add_argument(
"--stdout",
action="store_true",
help="Print JSON to stdout instead of writing manifest.json",
)
args = parser.parse_args()
skill_dir = Path(args.skill_dir).resolve()
if not skill_dir.is_dir():
print(f"[ERROR] Not a directory: {skill_dir}", file=sys.stderr)
sys.exit(1)
manifest = scan_skill(skill_dir)
if args.tree:
print_tree(manifest)
elif args.stdout:
print(json.dumps(manifest, indent=2))
else:
output_path = skill_dir / "dependency-manifest.json"
output_path.write_text(json.dumps(manifest, indent=2))
print(f"[OK] Wrote dependency manifest to {output_path}")
# Summarize
total = sum(len(manifest.get(c, [])) for c in CATEGORIES)
print(f" Found {total} total dependency entries:")
for cat in CATEGORIES:
print(f" {cat}: {len(manifest.get(cat, []))}")
if name == "main":
main()