import re import sys import json from pathlib import Path from typing import List, Dict, Any, Optional # Handle both module and direct execution if __name__ == "__main__": sys.path.insert(0, str(Path(__file__).parent)) from engine import OntologyEngine else: try: from .engine import OntologyEngine except ImportError: from engine import OntologyEngine def _parse_properties(props) -> Dict[str, Any]: """Parse JSON properties from SQLite.""" if not props: return {} if isinstance(props, dict): return props try: return json.loads(props) except: return {} class OntologyNLP: """Natural language to ontology query translator.""" def __init__(self, engine: Optional[OntologyEngine] = None): self.engine = engine or OntologyEngine() # Query patterns: regex โ†’ query type self.patterns = [ # Projects queries (r'what projects (is|are|does|do|involve|include)\s+(\w+)', 'projects_involving'), (r'projects involving\s+(\w+)', 'projects_involving'), (r'what is\s+(\w+)\s+working on', 'projects_involving'), (r'who works on\s+(.+)', 'who_works_on'), (r'who is involved in\s+(.+)', 'who_works_on'), # Relations queries (r'relations of\s+(.+)', 'relations_of'), (r'what is connected to\s+(.+)', 'relations_of'), (r'how is\s+(.+)\s+related to\s+(.+)', 'path_between'), # Family queries (check before entity queries) (r'who is in\s+(?:the\s+)?(matt|hoffmann).*family', 'family_members'), (r'family members of\s+(.+)', 'family_members'), # Entity queries (r'tell me about\s+(.+)', 'find_entity'), (r'show me\s+(.+)', 'find_entity'), (r'(who|what) is\s+(.+)', 'find_entity'), # Observation queries (r'what do we know about\s+(.+)', 'observations_of'), (r'recent info on\s+(.+)', 'observations_of'), # Type queries (r'show all\s+(.+)', 'find_by_type'), (r'list all\s+(.+)', 'find_by_type'), ] def ask(self, question: str) -> Dict[str, Any]: """Answer a natural language question about the ontology.""" question_lower = question.lower().strip() # Try pattern matching for pattern, query_type in self.patterns: match = re.search(pattern, question_lower) if match: return self._execute_query(query_type, match.groups(), question) # Fallback: search by name return { "question": question, "query_type": "search_fallback", "results": self.engine.find_entities(name_contains=question, limit=5), "note": "No specific pattern matched, searching by name" } def _execute_query( self, query_type: str, groups: tuple, original: str ) -> Dict[str, Any]: """Execute a typed query.""" if query_type == 'projects_involving': # Extract entity name name = groups[-1] if groups else "" return self._query_projects_involving(name, original) elif query_type == 'who_works_on': project_name = groups[0] if groups else "" return self._query_who_works_on(project_name, original) elif query_type == 'relations_of': entity_name = groups[0] if groups else "" return self._query_relations_of(entity_name, original) elif query_type == 'find_entity': entity_name = groups[-1] if groups else "" return self._query_find_entity(entity_name, original) elif query_type == 'observations_of': entity_name = groups[0] if groups else "" return self._query_observations(entity_name, original) elif query_type == 'find_by_type': type_name = groups[0] if groups else "" return self._query_by_type(type_name, original) elif query_type == 'family_members': return self._query_family_members(original) else: return { "question": original, "query_type": query_type, "error": "Query type not implemented", "groups": groups } def _resolve_entity(self, name: str) -> Optional[str]: """Try to resolve a name to an entity ID.""" # Direct match by ID entity = self.engine.get_entity(name) if entity: return entity['id'] # Search by name results = self.engine.find_entities(name_contains=name, limit=1) if results: return results[0]['id'] # Common aliases aliases = { 'matt': 'person:matt', 'wadsworth': 'agent:wadsworth', 'socrates': 'agent:socrates', 'daedalus': 'agent:daedalus', 'sully': 'person:sully', 'harper': 'person:harper', 'aundrea': 'person:aundrea', 'icarus': 'project:icarus', 'costco': 'project:costco_route', } return aliases.get(name.lower()) def _query_projects_involving(self, name: str, original: str) -> Dict[str, Any]: """Find projects involving a person/agent.""" entity_id = self._resolve_entity(name) if not entity_id: return { "question": original, "query_type": "projects_involving", "error": f"Could not resolve '{name}' to an entity", "results": [] } # Get relations where this entity works_on projects relations = self.engine.get_relations(entity_id, direction="from") projects = [] for r in relations: if r['relation_type'] in ('works_on', 'coordinates', 'manages'): project = self.engine.get_entity(r['to_id']) if project and project['type'] == 'project': projects.append(project) return { "question": original, "query_type": "projects_involving", "entity": name, "entity_id": entity_id, "results": projects, "count": len(projects) } def _query_who_works_on(self, project_name: str, original: str) -> Dict[str, Any]: """Find who works on a project.""" project_id = self._resolve_entity(project_name) if not project_id: return { "question": original, "query_type": "who_works_on", "error": f"Could not resolve project '{project_name}'", "results": [] } # Get relations where people/agents work on this project relations = self.engine.get_relations(project_id, direction="to") people = [] for r in relations: if r['relation_type'] in ('works_on', 'coordinates', 'manages'): person = self.engine.get_entity(r['from_id']) if person and person['type'] in ('person', 'agent'): props = _parse_properties(r.get('properties', {})) people.append({ **person, "role": props.get('role', 'contributor') }) return { "question": original, "query_type": "who_works_on", "project": project_name, "project_id": project_id, "results": people, "count": len(people) } def _query_relations_of(self, name: str, original: str) -> Dict[str, Any]: """Get all relations of an entity.""" entity_id = self._resolve_entity(name) if not entity_id: return { "question": original, "query_type": "relations_of", "error": f"Could not resolve '{name}'", "results": [] } entity = self.engine.get_entity(entity_id) relations = self.engine.get_relations(entity_id, direction="both") return { "question": original, "query_type": "relations_of", "entity": entity, "relations": relations, "count": len(relations) } def _query_find_entity(self, name: str, original: str) -> Dict[str, Any]: """Find an entity by name.""" entity_id = self._resolve_entity(name) if entity_id: entity = self.engine.get_entity(entity_id) relations = self.engine.get_relations(entity_id, direction="both") observations = self.engine.get_observations(entity_id, limit=5) return { "question": original, "query_type": "find_entity", "found": True, "entity": entity, "relations": relations, "observations": observations } # Fallback: search results = self.engine.find_entities(name_contains=name, limit=5) return { "question": original, "query_type": "find_entity", "found": False, "suggestions": results } def _query_observations(self, name: str, original: str) -> Dict[str, Any]: """Get observations about an entity.""" entity_id = self._resolve_entity(name) if not entity_id: return { "question": original, "query_type": "observations_of", "error": f"Could not resolve '{name}'", "results": [] } entity = self.engine.get_entity(entity_id) observations = self.engine.get_observations(entity_id, limit=20) return { "question": original, "query_type": "observations_of", "entity": entity, "observations": observations, "count": len(observations) } def _query_by_type(self, type_name: str, original: str) -> Dict[str, Any]: """Find entities by type.""" # Normalize type name type_map = { 'people': 'person', 'agents': 'agent', 'projects': 'project', 'events': 'event', 'documents': 'document', } normalized_type = type_map.get(type_name.lower(), type_name.lower()) results = self.engine.find_entities(type=normalized_type, limit=50) return { "question": original, "query_type": "find_by_type", "type": normalized_type, "results": results, "count": len(results) } def _query_family_members(self, original: str) -> Dict[str, Any]: """Get family members.""" # Find all entities where family=hoffmann results = self.engine.find_entities(type='person', limit=50) family = [] for r in results: props = _parse_properties(r.get('properties', {})) if 'hoffmann' in props.get('family', '').lower(): family.append(r) return { "question": original, "query_type": "family_members", "family": "Hoffmann", "results": family, "count": len(family) } # Convenience function def ask_ontology(question: str) -> str: """Ask the ontology a question, get a human-readable answer.""" nlp = OntologyNLP() result = nlp.ask(question) return format_result(result) def format_result(result: Dict[str, Any]) -> str: """Format query result as human-readable text.""" query_type = result.get('query_type', 'unknown') if result.get('error'): return f"โ“ {result['error']}" if query_type == 'projects_involving': projects = result.get('results', []) if not projects: return f"No projects found involving {result.get('entity', 'that entity')}." lines = [f"๐Ÿ“‹ Projects involving {result['entity']}:"] for p in projects: props = _parse_properties(p.get('properties', {})) status = props.get('status', 'unknown') lines.append(f" โ€ข {p['name']} ({status})") return "\n".join(lines) elif query_type == 'who_works_on': people = result.get('results', []) if not people: return f"No one found working on {result.get('project', 'that project')}." lines = [f"๐Ÿ‘ฅ People working on {result['project']}:"] for p in people: role = p.get('role', 'contributor') lines.append(f" โ€ข {p['name']} ({role})") return "\n".join(lines) elif query_type == 'relations_of': entity = result.get('entity', {}) relations = result.get('relations', []) if not entity: return "Entity not found." lines = [f"๐Ÿ”— Relations of {entity.get('name', 'Unknown')}:"] for r in relations: direction = "โ†’" if r['direction'] == 'outgoing' else "โ†" other = r.get('to_name') if r['direction'] == 'outgoing' else r.get('from_name') lines.append(f" {direction} {other} ({r['relation_type']})") return "\n".join(lines) elif query_type == 'find_entity': if result.get('found'): entity = result['entity'] props = _parse_properties(entity.get('properties', {})) lines = [f"๐Ÿ“Œ {entity['name']}", f"Type: {entity['type']}"] if props: for k, v in props.items(): lines.append(f"{k}: {v}") return "\n".join(lines) else: suggestions = result.get('suggestions', []) if suggestions: return f"Not found. Did you mean: {', '.join(s['name'] for s in suggestions)}?" return "No matching entities found." elif query_type == 'family_members': family = result.get('results', []) if not family: return "No family members found." lines = [f"๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ {result['family']} Family:"] for member in family: lines.append(f" โ€ข {member['name']}") return "\n".join(lines) elif query_type == 'find_by_type': items = result.get('results', []) if not items: return f"No {result.get('type', 'items')} found." lines = [f"๐Ÿ“‚ All {result.get('type', 'items')} ({len(items)}):"] for item in items: lines.append(f" โ€ข {item['name']}") return "\n".join(lines) # Default: return JSON return json.dumps(result, indent=2, default=str) import json if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: python -m ontology.natural_language 'what projects is socrates working on?'") sys.exit(1) question = " ".join(sys.argv[1:]) print(ask_ontology(question))