๐Ÿ“„ natural_language.py 15,062 bytes Apr 27, 2026 ๐Ÿ“‹ Raw

import re
import sys
import json
from pathlib import Path
from typing import List, Dict, Any, Optional

Handle both module and direct execution

if name == "main":
sys.path.insert(0, str(Path(file).parent))
from engine import OntologyEngine
else:
try:
from .engine import OntologyEngine
except ImportError:
from engine import OntologyEngine

def _parse_properties(props) -> Dict[str, Any]:
"""Parse JSON properties from SQLite."""
if not props:
return {}
if isinstance(props, dict):
return props
try:
return json.loads(props)
except:
return {}

class OntologyNLP:
"""Natural language to ontology query translator."""

def __init__(self, engine: Optional[OntologyEngine] = None):
    self.engine = engine or OntologyEngine()

    # Query patterns: regex โ†’ query type
    self.patterns = [
        # Projects queries
        (r'what projects (is|are|does|do|involve|include)\s+(\w+)', 'projects_involving'),
        (r'projects involving\s+(\w+)', 'projects_involving'),
        (r'what is\s+(\w+)\s+working on', 'projects_involving'),
        (r'who works on\s+(.+)', 'who_works_on'),
        (r'who is involved in\s+(.+)', 'who_works_on'),

        # Relations queries
        (r'relations of\s+(.+)', 'relations_of'),
        (r'what is connected to\s+(.+)', 'relations_of'),
        (r'how is\s+(.+)\s+related to\s+(.+)', 'path_between'),

        # Family queries (check before entity queries)
        (r'who is in\s+(?:the\s+)?(matt|hoffmann).*family', 'family_members'),
        (r'family members of\s+(.+)', 'family_members'),

        # Entity queries
        (r'tell me about\s+(.+)', 'find_entity'),
        (r'show me\s+(.+)', 'find_entity'),
        (r'(who|what) is\s+(.+)', 'find_entity'),

        # Observation queries
        (r'what do we know about\s+(.+)', 'observations_of'),
        (r'recent info on\s+(.+)', 'observations_of'),

        # Type queries
        (r'show all\s+(.+)', 'find_by_type'),
        (r'list all\s+(.+)', 'find_by_type'),
    ]

def ask(self, question: str) -> Dict[str, Any]:
    """Answer a natural language question about the ontology."""
    question_lower = question.lower().strip()

    # Try pattern matching
    for pattern, query_type in self.patterns:
        match = re.search(pattern, question_lower)
        if match:
            return self._execute_query(query_type, match.groups(), question)

    # Fallback: search by name
    return {
        "question": question,
        "query_type": "search_fallback",
        "results": self.engine.find_entities(name_contains=question, limit=5),
        "note": "No specific pattern matched, searching by name"
    }

def _execute_query(
    self,
    query_type: str,
    groups: tuple,
    original: str
) -> Dict[str, Any]:
    """Execute a typed query."""

    if query_type == 'projects_involving':
        # Extract entity name
        name = groups[-1] if groups else ""
        return self._query_projects_involving(name, original)

    elif query_type == 'who_works_on':
        project_name = groups[0] if groups else ""
        return self._query_who_works_on(project_name, original)

    elif query_type == 'relations_of':
        entity_name = groups[0] if groups else ""
        return self._query_relations_of(entity_name, original)

    elif query_type == 'find_entity':
        entity_name = groups[-1] if groups else ""
        return self._query_find_entity(entity_name, original)

    elif query_type == 'observations_of':
        entity_name = groups[0] if groups else ""
        return self._query_observations(entity_name, original)

    elif query_type == 'find_by_type':
        type_name = groups[0] if groups else ""
        return self._query_by_type(type_name, original)

    elif query_type == 'family_members':
        return self._query_family_members(original)

    else:
        return {
            "question": original,
            "query_type": query_type,
            "error": "Query type not implemented",
            "groups": groups
        }

def _resolve_entity(self, name: str) -> Optional[str]:
    """Try to resolve a name to an entity ID."""
    # Direct match by ID
    entity = self.engine.get_entity(name)
    if entity:
        return entity['id']

    # Search by name
    results = self.engine.find_entities(name_contains=name, limit=1)
    if results:
        return results[0]['id']

    # Common aliases
    aliases = {
        'matt': 'person:matt',
        'wadsworth': 'agent:wadsworth',
        'socrates': 'agent:socrates',
        'daedalus': 'agent:daedalus',
        'sully': 'person:sully',
        'harper': 'person:harper',
        'aundrea': 'person:aundrea',
        'icarus': 'project:icarus',
        'costco': 'project:costco_route',
    }

    return aliases.get(name.lower())

def _query_projects_involving(self, name: str, original: str) -> Dict[str, Any]:
    """Find projects involving a person/agent."""
    entity_id = self._resolve_entity(name)

    if not entity_id:
        return {
            "question": original,
            "query_type": "projects_involving",
            "error": f"Could not resolve '{name}' to an entity",
            "results": []
        }

    # Get relations where this entity works_on projects
    relations = self.engine.get_relations(entity_id, direction="from")
    projects = []

    for r in relations:
        if r['relation_type'] in ('works_on', 'coordinates', 'manages'):
            project = self.engine.get_entity(r['to_id'])
            if project and project['type'] == 'project':
                projects.append(project)

    return {
        "question": original,
        "query_type": "projects_involving",
        "entity": name,
        "entity_id": entity_id,
        "results": projects,
        "count": len(projects)
    }

def _query_who_works_on(self, project_name: str, original: str) -> Dict[str, Any]:
    """Find who works on a project."""
    project_id = self._resolve_entity(project_name)

    if not project_id:
        return {
            "question": original,
            "query_type": "who_works_on",
            "error": f"Could not resolve project '{project_name}'",
            "results": []
        }

    # Get relations where people/agents work on this project
    relations = self.engine.get_relations(project_id, direction="to")
    people = []

    for r in relations:
        if r['relation_type'] in ('works_on', 'coordinates', 'manages'):
            person = self.engine.get_entity(r['from_id'])
            if person and person['type'] in ('person', 'agent'):
                props = _parse_properties(r.get('properties', {}))
                people.append({
                    **person,
                    "role": props.get('role', 'contributor')
                })

    return {
        "question": original,
        "query_type": "who_works_on",
        "project": project_name,
        "project_id": project_id,
        "results": people,
        "count": len(people)
    }

def _query_relations_of(self, name: str, original: str) -> Dict[str, Any]:
    """Get all relations of an entity."""
    entity_id = self._resolve_entity(name)

    if not entity_id:
        return {
            "question": original,
            "query_type": "relations_of",
            "error": f"Could not resolve '{name}'",
            "results": []
        }

    entity = self.engine.get_entity(entity_id)
    relations = self.engine.get_relations(entity_id, direction="both")

    return {
        "question": original,
        "query_type": "relations_of",
        "entity": entity,
        "relations": relations,
        "count": len(relations)
    }

def _query_find_entity(self, name: str, original: str) -> Dict[str, Any]:
    """Find an entity by name."""
    entity_id = self._resolve_entity(name)

    if entity_id:
        entity = self.engine.get_entity(entity_id)
        relations = self.engine.get_relations(entity_id, direction="both")
        observations = self.engine.get_observations(entity_id, limit=5)

        return {
            "question": original,
            "query_type": "find_entity",
            "found": True,
            "entity": entity,
            "relations": relations,
            "observations": observations
        }

    # Fallback: search
    results = self.engine.find_entities(name_contains=name, limit=5)
    return {
        "question": original,
        "query_type": "find_entity",
        "found": False,
        "suggestions": results
    }

def _query_observations(self, name: str, original: str) -> Dict[str, Any]:
    """Get observations about an entity."""
    entity_id = self._resolve_entity(name)

    if not entity_id:
        return {
            "question": original,
            "query_type": "observations_of",
            "error": f"Could not resolve '{name}'",
            "results": []
        }

    entity = self.engine.get_entity(entity_id)
    observations = self.engine.get_observations(entity_id, limit=20)

    return {
        "question": original,
        "query_type": "observations_of",
        "entity": entity,
        "observations": observations,
        "count": len(observations)
    }

def _query_by_type(self, type_name: str, original: str) -> Dict[str, Any]:
    """Find entities by type."""
    # Normalize type name
    type_map = {
        'people': 'person',
        'agents': 'agent',
        'projects': 'project',
        'events': 'event',
        'documents': 'document',
    }

    normalized_type = type_map.get(type_name.lower(), type_name.lower())
    results = self.engine.find_entities(type=normalized_type, limit=50)

    return {
        "question": original,
        "query_type": "find_by_type",
        "type": normalized_type,
        "results": results,
        "count": len(results)
    }

def _query_family_members(self, original: str) -> Dict[str, Any]:
    """Get family members."""
    # Find all entities where family=hoffmann
    results = self.engine.find_entities(type='person', limit=50)
    family = []
    for r in results:
        props = _parse_properties(r.get('properties', {}))
        if 'hoffmann' in props.get('family', '').lower():
            family.append(r)

    return {
        "question": original,
        "query_type": "family_members",
        "family": "Hoffmann",
        "results": family,
        "count": len(family)
    }

Convenience function

def ask_ontology(question: str) -> str:
"""Ask the ontology a question, get a human-readable answer."""
nlp = OntologyNLP()
result = nlp.ask(question)

return format_result(result)

def format_result(result: Dict[str, Any]) -> str:
"""Format query result as human-readable text."""
query_type = result.get('query_type', 'unknown')

if result.get('error'):
    return f"โ“ {result['error']}"

if query_type == 'projects_involving':
    projects = result.get('results', [])
    if not projects:
        return f"No projects found involving {result.get('entity', 'that entity')}."
    lines = [f"๐Ÿ“‹ Projects involving {result['entity']}:"]
    for p in projects:
        props = _parse_properties(p.get('properties', {}))
        status = props.get('status', 'unknown')
        lines.append(f"  โ€ข {p['name']} ({status})")
    return "\n".join(lines)

elif query_type == 'who_works_on':
    people = result.get('results', [])
    if not people:
        return f"No one found working on {result.get('project', 'that project')}."
    lines = [f"๐Ÿ‘ฅ People working on {result['project']}:"]
    for p in people:
        role = p.get('role', 'contributor')
        lines.append(f"  โ€ข {p['name']} ({role})")
    return "\n".join(lines)

elif query_type == 'relations_of':
    entity = result.get('entity', {})
    relations = result.get('relations', [])
    if not entity:
        return "Entity not found."
    lines = [f"๐Ÿ”— Relations of {entity.get('name', 'Unknown')}:"]
    for r in relations:
        direction = "โ†’" if r['direction'] == 'outgoing' else "โ†"
        other = r.get('to_name') if r['direction'] == 'outgoing' else r.get('from_name')
        lines.append(f"  {direction} {other} ({r['relation_type']})")
    return "\n".join(lines)

elif query_type == 'find_entity':
    if result.get('found'):
        entity = result['entity']
        props = _parse_properties(entity.get('properties', {}))
        lines = [f"๐Ÿ“Œ {entity['name']}", f"Type: {entity['type']}"]
        if props:
            for k, v in props.items():
                lines.append(f"{k}: {v}")
        return "\n".join(lines)
    else:
        suggestions = result.get('suggestions', [])
        if suggestions:
            return f"Not found. Did you mean: {', '.join(s['name'] for s in suggestions)}?"
        return "No matching entities found."

elif query_type == 'family_members':
    family = result.get('results', [])
    if not family:
        return "No family members found."
    lines = [f"๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ {result['family']} Family:"]
    for member in family:
        lines.append(f"  โ€ข {member['name']}")
    return "\n".join(lines)

elif query_type == 'find_by_type':
    items = result.get('results', [])
    if not items:
        return f"No {result.get('type', 'items')} found."
    lines = [f"๐Ÿ“‚ All {result.get('type', 'items')} ({len(items)}):"]
    for item in items:
        lines.append(f"  โ€ข {item['name']}")
    return "\n".join(lines)

# Default: return JSON
return json.dumps(result, indent=2, default=str)

import json

if name == "main":
import sys

if len(sys.argv) < 2:
    print("Usage: python -m ontology.natural_language 'what projects is socrates working on?'")
    sys.exit(1)

question = " ".join(sys.argv[1:])
print(ask_ontology(question))