import re
import sys
import json
from pathlib import Path
from typing import List, Dict, Any, Optional
Handle both module and direct execution
if name == "main":
sys.path.insert(0, str(Path(file).parent))
from engine import OntologyEngine
else:
try:
from .engine import OntologyEngine
except ImportError:
from engine import OntologyEngine
def _parse_properties(props) -> Dict[str, Any]:
"""Parse JSON properties from SQLite."""
if not props:
return {}
if isinstance(props, dict):
return props
try:
return json.loads(props)
except:
return {}
class OntologyNLP:
"""Natural language to ontology query translator."""
def __init__(self, engine: Optional[OntologyEngine] = None):
self.engine = engine or OntologyEngine()
# Query patterns: regex โ query type
self.patterns = [
# Projects queries
(r'what projects (is|are|does|do|involve|include)\s+(\w+)', 'projects_involving'),
(r'projects involving\s+(\w+)', 'projects_involving'),
(r'what is\s+(\w+)\s+working on', 'projects_involving'),
(r'who works on\s+(.+)', 'who_works_on'),
(r'who is involved in\s+(.+)', 'who_works_on'),
# Relations queries
(r'relations of\s+(.+)', 'relations_of'),
(r'what is connected to\s+(.+)', 'relations_of'),
(r'how is\s+(.+)\s+related to\s+(.+)', 'path_between'),
# Family queries (check before entity queries)
(r'who is in\s+(?:the\s+)?(matt|hoffmann).*family', 'family_members'),
(r'family members of\s+(.+)', 'family_members'),
# Entity queries
(r'tell me about\s+(.+)', 'find_entity'),
(r'show me\s+(.+)', 'find_entity'),
(r'(who|what) is\s+(.+)', 'find_entity'),
# Observation queries
(r'what do we know about\s+(.+)', 'observations_of'),
(r'recent info on\s+(.+)', 'observations_of'),
# Type queries
(r'show all\s+(.+)', 'find_by_type'),
(r'list all\s+(.+)', 'find_by_type'),
]
def ask(self, question: str) -> Dict[str, Any]:
"""Answer a natural language question about the ontology."""
question_lower = question.lower().strip()
# Try pattern matching
for pattern, query_type in self.patterns:
match = re.search(pattern, question_lower)
if match:
return self._execute_query(query_type, match.groups(), question)
# Fallback: search by name
return {
"question": question,
"query_type": "search_fallback",
"results": self.engine.find_entities(name_contains=question, limit=5),
"note": "No specific pattern matched, searching by name"
}
def _execute_query(
self,
query_type: str,
groups: tuple,
original: str
) -> Dict[str, Any]:
"""Execute a typed query."""
if query_type == 'projects_involving':
# Extract entity name
name = groups[-1] if groups else ""
return self._query_projects_involving(name, original)
elif query_type == 'who_works_on':
project_name = groups[0] if groups else ""
return self._query_who_works_on(project_name, original)
elif query_type == 'relations_of':
entity_name = groups[0] if groups else ""
return self._query_relations_of(entity_name, original)
elif query_type == 'find_entity':
entity_name = groups[-1] if groups else ""
return self._query_find_entity(entity_name, original)
elif query_type == 'observations_of':
entity_name = groups[0] if groups else ""
return self._query_observations(entity_name, original)
elif query_type == 'find_by_type':
type_name = groups[0] if groups else ""
return self._query_by_type(type_name, original)
elif query_type == 'family_members':
return self._query_family_members(original)
else:
return {
"question": original,
"query_type": query_type,
"error": "Query type not implemented",
"groups": groups
}
def _resolve_entity(self, name: str) -> Optional[str]:
"""Try to resolve a name to an entity ID."""
# Direct match by ID
entity = self.engine.get_entity(name)
if entity:
return entity['id']
# Search by name
results = self.engine.find_entities(name_contains=name, limit=1)
if results:
return results[0]['id']
# Common aliases
aliases = {
'matt': 'person:matt',
'wadsworth': 'agent:wadsworth',
'socrates': 'agent:socrates',
'daedalus': 'agent:daedalus',
'sully': 'person:sully',
'harper': 'person:harper',
'aundrea': 'person:aundrea',
'icarus': 'project:icarus',
'costco': 'project:costco_route',
}
return aliases.get(name.lower())
def _query_projects_involving(self, name: str, original: str) -> Dict[str, Any]:
"""Find projects involving a person/agent."""
entity_id = self._resolve_entity(name)
if not entity_id:
return {
"question": original,
"query_type": "projects_involving",
"error": f"Could not resolve '{name}' to an entity",
"results": []
}
# Get relations where this entity works_on projects
relations = self.engine.get_relations(entity_id, direction="from")
projects = []
for r in relations:
if r['relation_type'] in ('works_on', 'coordinates', 'manages'):
project = self.engine.get_entity(r['to_id'])
if project and project['type'] == 'project':
projects.append(project)
return {
"question": original,
"query_type": "projects_involving",
"entity": name,
"entity_id": entity_id,
"results": projects,
"count": len(projects)
}
def _query_who_works_on(self, project_name: str, original: str) -> Dict[str, Any]:
"""Find who works on a project."""
project_id = self._resolve_entity(project_name)
if not project_id:
return {
"question": original,
"query_type": "who_works_on",
"error": f"Could not resolve project '{project_name}'",
"results": []
}
# Get relations where people/agents work on this project
relations = self.engine.get_relations(project_id, direction="to")
people = []
for r in relations:
if r['relation_type'] in ('works_on', 'coordinates', 'manages'):
person = self.engine.get_entity(r['from_id'])
if person and person['type'] in ('person', 'agent'):
props = _parse_properties(r.get('properties', {}))
people.append({
**person,
"role": props.get('role', 'contributor')
})
return {
"question": original,
"query_type": "who_works_on",
"project": project_name,
"project_id": project_id,
"results": people,
"count": len(people)
}
def _query_relations_of(self, name: str, original: str) -> Dict[str, Any]:
"""Get all relations of an entity."""
entity_id = self._resolve_entity(name)
if not entity_id:
return {
"question": original,
"query_type": "relations_of",
"error": f"Could not resolve '{name}'",
"results": []
}
entity = self.engine.get_entity(entity_id)
relations = self.engine.get_relations(entity_id, direction="both")
return {
"question": original,
"query_type": "relations_of",
"entity": entity,
"relations": relations,
"count": len(relations)
}
def _query_find_entity(self, name: str, original: str) -> Dict[str, Any]:
"""Find an entity by name."""
entity_id = self._resolve_entity(name)
if entity_id:
entity = self.engine.get_entity(entity_id)
relations = self.engine.get_relations(entity_id, direction="both")
observations = self.engine.get_observations(entity_id, limit=5)
return {
"question": original,
"query_type": "find_entity",
"found": True,
"entity": entity,
"relations": relations,
"observations": observations
}
# Fallback: search
results = self.engine.find_entities(name_contains=name, limit=5)
return {
"question": original,
"query_type": "find_entity",
"found": False,
"suggestions": results
}
def _query_observations(self, name: str, original: str) -> Dict[str, Any]:
"""Get observations about an entity."""
entity_id = self._resolve_entity(name)
if not entity_id:
return {
"question": original,
"query_type": "observations_of",
"error": f"Could not resolve '{name}'",
"results": []
}
entity = self.engine.get_entity(entity_id)
observations = self.engine.get_observations(entity_id, limit=20)
return {
"question": original,
"query_type": "observations_of",
"entity": entity,
"observations": observations,
"count": len(observations)
}
def _query_by_type(self, type_name: str, original: str) -> Dict[str, Any]:
"""Find entities by type."""
# Normalize type name
type_map = {
'people': 'person',
'agents': 'agent',
'projects': 'project',
'events': 'event',
'documents': 'document',
}
normalized_type = type_map.get(type_name.lower(), type_name.lower())
results = self.engine.find_entities(type=normalized_type, limit=50)
return {
"question": original,
"query_type": "find_by_type",
"type": normalized_type,
"results": results,
"count": len(results)
}
def _query_family_members(self, original: str) -> Dict[str, Any]:
"""Get family members."""
# Find all entities where family=hoffmann
results = self.engine.find_entities(type='person', limit=50)
family = []
for r in results:
props = _parse_properties(r.get('properties', {}))
if 'hoffmann' in props.get('family', '').lower():
family.append(r)
return {
"question": original,
"query_type": "family_members",
"family": "Hoffmann",
"results": family,
"count": len(family)
}
Convenience function
def ask_ontology(question: str) -> str:
"""Ask the ontology a question, get a human-readable answer."""
nlp = OntologyNLP()
result = nlp.ask(question)
return format_result(result)
def format_result(result: Dict[str, Any]) -> str:
"""Format query result as human-readable text."""
query_type = result.get('query_type', 'unknown')
if result.get('error'):
return f"โ {result['error']}"
if query_type == 'projects_involving':
projects = result.get('results', [])
if not projects:
return f"No projects found involving {result.get('entity', 'that entity')}."
lines = [f"๐ Projects involving {result['entity']}:"]
for p in projects:
props = _parse_properties(p.get('properties', {}))
status = props.get('status', 'unknown')
lines.append(f" โข {p['name']} ({status})")
return "\n".join(lines)
elif query_type == 'who_works_on':
people = result.get('results', [])
if not people:
return f"No one found working on {result.get('project', 'that project')}."
lines = [f"๐ฅ People working on {result['project']}:"]
for p in people:
role = p.get('role', 'contributor')
lines.append(f" โข {p['name']} ({role})")
return "\n".join(lines)
elif query_type == 'relations_of':
entity = result.get('entity', {})
relations = result.get('relations', [])
if not entity:
return "Entity not found."
lines = [f"๐ Relations of {entity.get('name', 'Unknown')}:"]
for r in relations:
direction = "โ" if r['direction'] == 'outgoing' else "โ"
other = r.get('to_name') if r['direction'] == 'outgoing' else r.get('from_name')
lines.append(f" {direction} {other} ({r['relation_type']})")
return "\n".join(lines)
elif query_type == 'find_entity':
if result.get('found'):
entity = result['entity']
props = _parse_properties(entity.get('properties', {}))
lines = [f"๐ {entity['name']}", f"Type: {entity['type']}"]
if props:
for k, v in props.items():
lines.append(f"{k}: {v}")
return "\n".join(lines)
else:
suggestions = result.get('suggestions', [])
if suggestions:
return f"Not found. Did you mean: {', '.join(s['name'] for s in suggestions)}?"
return "No matching entities found."
elif query_type == 'family_members':
family = result.get('results', [])
if not family:
return "No family members found."
lines = [f"๐จโ๐ฉโ๐งโ๐ฆ {result['family']} Family:"]
for member in family:
lines.append(f" โข {member['name']}")
return "\n".join(lines)
elif query_type == 'find_by_type':
items = result.get('results', [])
if not items:
return f"No {result.get('type', 'items')} found."
lines = [f"๐ All {result.get('type', 'items')} ({len(items)}):"]
for item in items:
lines.append(f" โข {item['name']}")
return "\n".join(lines)
# Default: return JSON
return json.dumps(result, indent=2, default=str)
import json
if name == "main":
import sys
if len(sys.argv) < 2:
print("Usage: python -m ontology.natural_language 'what projects is socrates working on?'")
sys.exit(1)
question = " ".join(sys.argv[1:])
print(ask_ontology(question))