๐Ÿ“„ location_cache.py 22,481 bytes Apr 18, 2026 ๐Ÿ“‹ Raw

"""Location Cache โ€” Local known-locations cache for goplaces.

Prevents redundant Google Places API calls by caching resolved locations
to a local JSON file. Known locations (school, pediatrician, etc.) resolve
instantly from cache. New locations are fetched via goplaces, formatted,
and saved for future use.

Architecture:
- Cache stored as JSON in memory/locations.json (gitignored)
- Keyed by normalized location string (lowercase, stripped)
- Each entry: name, address, place_id, lat, lng, types, travel_time_min, cached_at
- Home coordinates from family.yaml โ†’ home section
- Search radius biased around home for local results
- TTL: 90 days (school-year context; re-verify annually)

Integration points:
- Hermes: enrich event messages with travel time
- Daily brief: add "15 min from home" context to unfamiliar locations
- Intent engine: resolve location strings from chat
- Clicker: add travel context to signup slots
"""

import hashlib
import json
import os
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo

from family_assistant.config import CHICAGO_TZ, load_family_config

---------------------------------------------------------------------------

Config

---------------------------------------------------------------------------

CACHE_DIR = Path(os.environ.get(
"LOCATION_CACHE_DIR",
str(Path.home() / ".family_assistant" / "locations"),
))
CACHE_FILE = CACHE_DIR / "known_locations.json"
CACHE_TTL_DAYS = 90 # Re-verify annually (school-year context)
DEFAULT_RADIUS_M = 16000 # ~10 miles โ€” biased around home

---------------------------------------------------------------------------

Home coordinates

---------------------------------------------------------------------------

_home_cache = None

def get_home():
"""Load home coordinates from family.yaml.

Returns dict with keys: lat, lng, address, label
Raises KeyError if home section is missing.
"""
global _home_cache
if _home_cache is not None:
    return _home_cache

config = load_family_config()
home = config.get("home")
if not home:
    raise KeyError(
        "No 'home' section in family.yaml. Add:\n"
        "home:\n"
        "  address: '123 Main St, Green Bay, WI 54304'\n"
        "  lat: 44.5126\n"
        "  lng: -88.0133\n"
    )

_home_cache = {
    "lat": float(home["lat"]),
    "lng": float(home["lng"]),
    "address": home.get("address", ""),
    "label": home.get("label", "Home"),
}
return _home_cache

---------------------------------------------------------------------------

Cache I/O

---------------------------------------------------------------------------

def _normalize_key(location_str):
"""Normalize a location string for cache lookup.

Lowercase, strip, collapse whitespace, remove trailing punctuation.
"""
s = location_str.lower().strip()
s = re.sub(r"\s+", " ", s)
s = re.sub(r"[,.]$", "", s)
return s

def _load_cache():
"""Load the known locations cache from disk. Returns dict keyed by normalized string."""
if not CACHE_FILE.exists():
return {}
try:
with open(CACHE_FILE, "r") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
except (json.JSONDecodeError, OSError):
return {}

def _save_cache(cache):
"""Write the cache dict to disk, creating the directory if needed."""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
with open(CACHE_FILE, "w") as f:
json.dump(cache, f, indent=2, default=str)

---------------------------------------------------------------------------

goplaces CLI

---------------------------------------------------------------------------

def _run_goplaces(args, timeout=15):
"""Run goplaces CLI with given arguments. Returns parsed JSON or None."""
cmd = ["goplaces"] + args + ["--json"]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
)
if result.returncode != 0:
print(f" [LocationCache] goplaces error: {result.stderr.strip()}", file=sys.stderr)
return None

    output = result.stdout.strip()
    if not output:
        return None

    data = json.loads(output)

    # goplaces returns a dict with "results" list or a single result
    if isinstance(data, dict) and "results" in data:
        return data["results"]
    elif isinstance(data, list):
        return data
    elif isinstance(data, dict) and "id" in data:
        return [data]  # single place result
    return None

except subprocess.TimeoutExpired:
    print("  [LocationCache] goplaces timeout", file=sys.stderr)
    return None
except json.JSONDecodeError:
    print("  [LocationCache] goplaces returned non-JSON", file=sys.stderr)
    return None
except FileNotFoundError:
    print("  [LocationCache] goplaces not found โ€” install with: brew install steipete/tap/goplaces", file=sys.stderr)
    return None

def _compute_travel_time(home_lat, home_lng, dest_lat, dest_lng):
"""Get actual driving time from home to destination via goplaces directions.

Falls back to haversine estimation if directions API fails.
Returns travel time in minutes.
"""
# Try real directions first
try:
    result = subprocess.run(
        [
            "goplaces", "directions",
            "--from-lat", str(home_lat),
            "--from-lng=" + str(home_lng),
            "--to-lat", str(dest_lat),
            "--to-lng=" + str(dest_lng),
            "--mode=drive",
            "--json",
        ],
        capture_output=True,
        text=True,
        timeout=10,
    )
    if result.returncode == 0 and result.stdout.strip():
        data = json.loads(result.stdout.strip())
        # goplaces directions returns duration in seconds
        duration_sec = None
        if isinstance(data, dict):
            # Try common response structures
            duration_sec = (
                data.get("duration_seconds")
                or data.get("duration")
                or (data.get("legs", [{}])[0].get("duration", {}).get("value"))
            )
            if duration_sec is None and "routes" in data and data["routes"]:
                leg = data["routes"][0].get("legs", [{}])[0]
                duration_sec = leg.get("duration", {}).get("value")
        elif isinstance(data, list) and data:
            duration_sec = data[0].get("duration_seconds") or data[0].get("duration")

        if duration_sec and isinstance(duration_sec, (int, float)):
            return round(duration_sec / 60)  # Convert seconds to minutes
except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception):
    pass  # Fall through to haversine estimate

# Fallback: haversine estimation
from math import asin, cos, radians, sin, sqrt

lat1, lon1 = radians(home_lat), radians(home_lng)
lat2, lon2 = radians(dest_lat), radians(dest_lng)

dlat = lat2 - lat1
dlon = lon2 - lon1

a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
c = 2 * asin(sqrt(a))

miles = 3958.8 * c
road_miles = miles * 1.4  # Suburban road factor
minutes = (road_miles / 35) * 60  # 35 mph average

return round(minutes)

def _nominatim_search(location_str, timeout=10):
"""Search for a location using Nominatim (OpenStreetMap) geocoding.

Free, no API key required. Rate-limited to 1 req/sec by Nominatim policy.
Used as fallback when goplaces/Google Places API is unavailable.

Returns a structured result dict matching our cache format, or None.
"""
try:
    import urllib.request
    import urllib.parse

    # Append home city/state for local bias
    home_suffix = ""
    try:
        home = get_home()
        home_suffix = " Green Bay, WI"  # Local bias for better results
    except (KeyError, ValueError):
        pass

    query = location_str.strip() + home_suffix
    url = (
        f"https://nominatim.openstreetmap.org/search?"
        f"q={urllib.parse.quote(query)}&format=json&limit=1&addressdetails=1"
    )

    req = urllib.request.Request(url, headers={
        "User-Agent": "hoffdesk-family-assistant/1.0",
    })
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        data = json.loads(resp.read().decode())

    if not data or not isinstance(data, list):
        return None

    item = data[0]
    lat = float(item.get("lat", 0))
    lng = float(item.get("lon", 0))
    if not lat or not lng:
        return None

    # Parse display name for a shorter name
    display_name = item.get("display_name", "")
    name = display_name.split(",")[0].strip() if display_name else location_str

    # Build address from address details if available
    addr = item.get("address", {})
    address_parts = []
    if addr.get("road"):
        house_number = addr.get("house_number", "")
        address_parts.append(f"{house_number} {addr['road']}".strip())
    if addr.get("city"):
        address_parts.append(addr["city"])
    if addr.get("state"):
        address_parts.append(addr["state"])
    if addr.get("postcode"):
        address_parts.append(addr["postcode"])
    address = ", ".join(address_parts) if address_parts else display_name

    key = _normalize_key(location_str)

    result = {
        "key": key,
        "original_query": location_str,
        "name": name,
        "address": address,
        "place_id": item.get("place_id", ""),
        "lat": lat,
        "lng": lng,
        "types": item.get("class", []),
        "phone": None,
        "website": None,
        "rating": None,
        "cached_at": datetime.now(CHICAGO_TZ).isoformat(),
        "from_cache": False,
        "source": "nominatim",
    }

    # Compute travel time from home
    try:
        home = get_home()
        result["travel_time_min"] = _compute_travel_time(home["lat"], home["lng"], lat, lng)
        result["distance_mi"] = round(result["travel_time_min"] / 60 * 35 / 1.4, 1)
    except (KeyError, ValueError):
        pass

    # Save to cache for future lookups
    cache = _load_cache()
    cache[key] = result
    _save_cache(cache)

    return result

except Exception as e:
    print(f"  [LocationCache] Nominatim error: {e}", file=sys.stderr)
    return None

---------------------------------------------------------------------------

Public API

---------------------------------------------------------------------------

def resolve(location_str, use_home_bias=True, force_refresh=False):
"""Resolve a location string to a structured place dict.

1. Check local cache (normalized key match)
2. If cached and not expired, return cached result
3. If not cached or force_refresh, call goplaces API
4. Save result to cache for future lookups

Args:
    location_str: Human-readable location (e.g., "Aurora BayCare", "Golrusk Pet Center")
    use_home_bias: If True, bias search around home coordinates
    force_refresh: If True, skip cache and re-fetch from API

Returns:
    Dict with keys:
      - name: Business/place name
      - address: Full street address
      - place_id: Google Places ID
      - lat, lng: Coordinates
      - types: Place types (e.g., ["doctor", "hospital"])
      - travel_time_min: Estimated driving time from home (minutes)
      - distance_mi: Estimated distance from home (miles)
      - from_cache: Whether result came from local cache
      - cached_at: ISO timestamp when cached
    None if location cannot be resolved.
"""
key = _normalize_key(location_str)
cache = _load_cache()

# Check cache first (unless force_refresh)
if not force_refresh and key in cache:
    entry = cache[key]
    # Check TTL
    cached_at = entry.get("cached_at")
    if cached_at:
        try:
            cached_dt = datetime.fromisoformat(cached_at)
            age_days = (datetime.now(CHICAGO_TZ) - cached_dt).days
            if age_days < CACHE_TTL_DAYS:
                entry["from_cache"] = True
                return entry
        except (ValueError, TypeError):
            pass  # Malformed date โ€” re-fetch

# Fuzzy prefix match: if the query is a partial name (e.g., "Golrusk")
# and a cached entry starts with that string ("Golrusk Pet Care Center"),
# use the cached entry. This prevents LLM hallucinations like
# "at Golrusk" โ†’ "PetSmart" because the LLM didn't know the full name.
if not force_refresh and key not in cache:
    for cached_key, cached_entry in cache.items():
        # Check if query is a prefix of a cached name, or vice versa
        if cached_key.startswith(key) or key.startswith(cached_key):
            if len(key) >= 4:  # Minimum length to avoid false matches
                cached_at = cached_entry.get("cached_at")
                if cached_at:
                    try:
                        cached_dt = datetime.fromisoformat(cached_at)
                        age_days = (datetime.now(CHICAGO_TZ) - cached_dt).days
                        if age_days < CACHE_TTL_DAYS:
                            cached_entry["from_cache"] = True
                            cached_entry["fuzzy_match"] = True
                            return cached_entry
                    except (ValueError, TypeError):
                        pass

# Not in cache or expired โ€” call goplaces
search_args = ["search", location_str, "--limit", "1"]

# Bias search around home coordinates
if use_home_bias:
    try:
        home = get_home()
        search_args.extend([
            "--lat", str(home["lat"]),
            "--lng=" + str(home["lng"]),  # = syntax avoids negative-number flag parsing
            "--radius-m", str(DEFAULT_RADIUS_M),
        ])
    except (KeyError, ValueError):
        pass  # No home config โ€” search without bias

results = _run_goplaces(search_args)

# Fallback: Nominatim (OpenStreetMap) โ€” free, no API key needed
# Used when goplaces/Google Places API is unavailable or suspended
if not results:
    nominatim_result = _nominatim_search(location_str)
    if nominatim_result:
        return nominatim_result
    return None

place = results[0]  # Top result

# Get detailed info if we have a place_id
place_id = place.get("id") or place.get("place_id")
if place_id:
    details = _run_goplaces(["details", place_id], timeout=10)
    if details and isinstance(details, list) and len(details) > 0:
        # Merge detail fields into place
        detail = details[0]
        for field in ["formatted_address", "formatted_phone_number", "website",
                      "opening_hours", "rating", "user_ratings_total"]:
            if field in detail:
                place[field] = detail[field]

# Build structured result
# goplaces returns coordinates under 'location' key, not flat lat/lng
loc = place.get("location", {})
lat = place.get("lat") or loc.get("lat") or place.get("geometry", {}).get("location", {}).get("lat")
lng = place.get("lng") or loc.get("lng") or place.get("geometry", {}).get("location", {}).get("lng")
address = place.get("address") or place.get("formatted_address") or place.get("vicinity", "")
place_id = place_id  # already resolved above

result = {
    "key": key,
    "original_query": location_str,
    "name": place.get("name") or place.get("displayName", {}).get("text", ""),
    "address": address,
    "place_id": place_id,
    "lat": lat,
    "lng": lng,
    "types": place.get("types", []),
    "phone": place.get("formatted_phone_number"),
    "website": place.get("website"),
    "rating": place.get("rating"),
    "cached_at": datetime.now(CHICAGO_TZ).isoformat(),
    "from_cache": False,
}

# Compute travel time from home
if lat is not None and lng is not None:
    try:
        home = get_home()
        result["travel_time_min"] = _compute_travel_time(home["lat"], home["lng"], lat, lng)
        result["distance_mi"] = round(result["travel_time_min"] / 60 * 35 / 1.4, 1)  # Reverse of travel time calc
    except (KeyError, ValueError):
        pass  # No home config โ€” skip travel time

# Save to cache
cache[key] = result
_save_cache(cache)

return result

def resolve_batch(location_strings, use_home_bias=True, force_refresh=False):
"""Resolve multiple locations. Returns list of (input, result) tuples.

Skips None results (unresolvable locations).
"""
results = []
for loc in location_strings:
    r = resolve(loc, use_home_bias=use_home_bias, force_refresh=force_refresh)
    results.append((loc, r))
return results

def format_travel_info(location_result):
"""Format a location result as a concise travel string.

E.g., "15 min from home" or "2.3 mi away"
"""
if not location_result:
    return ""

travel = location_result.get("travel_time_min")
distance = location_result.get("distance_mi")
name = location_result.get("name", "")

parts = []
if name:
    parts.append(name)
if travel is not None:
    parts.append(f"{travel} min from home")
elif distance is not None:
    parts.append(f"{distance} mi away")

return " ยท ".join(parts) if parts else ""

def format_location_enrichment(location_result):
"""Format a location result as a rich Telegram enrichment string.

E.g., "๐Ÿ“ Aurora BayCare ยท 123 Main St, Green Bay ยท 15 min from home"
"""
if not location_result:
    return ""

parts = []
name = location_result.get("name")
address = location_result.get("address")
travel = location_result.get("travel_time_min")
phone = location_result.get("phone")

if name:
    parts.append(name)
if address and address != name:
    parts.append(address)
if travel is not None:
    parts.append(f"{travel} min from home")
if phone:
    parts.append(f"๐Ÿ“ž {phone}")

return " ยท ".join(parts) if parts else ""

def purge_expired(days=None):
"""Remove expired entries from the location cache.

Args:
    days: Number of days threshold. Entries older than this are removed.
          Defaults to CACHE_TTL_DAYS.

Returns:
    Number of entries purged.
"""
if days is None:
    days = CACHE_TTL_DAYS

cache = _load_cache()
if not cache:
    return 0

now = datetime.now(CHICAGO_TZ)
purged = 0

for key in list(cache.keys()):
    entry = cache[key]
    cached_at = entry.get("cached_at")
    if not cached_at:
        purged += 1
        del cache[key]
        continue

    try:
        cached_dt = datetime.fromisoformat(cached_at)
        age_days = (now - cached_dt).days
        if age_days > days:
            del cache[key]
            purged += 1
    except (ValueError, TypeError):
        purged += 1
        del cache[key]

if purged > 0:
    _save_cache(cache)

return purged

def stats():
"""Return cache statistics.

Returns dict with: total_entries, cached_locations, expired_entries,
with_travel_time, oldest_entry, newest_entry.
"""
cache = _load_cache()
if not cache:
    return {
        "total_entries": 0,
        "cached_locations": [],
        "expired_entries": 0,
        "with_travel_time": 0,
    }

now = datetime.now(CHICAGO_TZ)
total = len(cache)
expired = 0
with_travel = 0
names = []
oldest = newest = None

for key, entry in cache.items():
    cached_at = entry.get("cached_at")
    if cached_at:
        try:
            cached_dt = datetime.fromisoformat(cached_at)
            age_days = (now - cached_dt).days
            if age_days > CACHE_TTL_DAYS:
                expired += 1
            if oldest is None or cached_dt < oldest:
                oldest = cached_dt
            if newest is None or cached_dt > newest:
                newest = cached_dt
        except (ValueError, TypeError):
            pass

    if entry.get("travel_time_min") is not None:
        with_travel += 1

    names.append(entry.get("name") or key)

return {
    "total_entries": total,
    "cached_locations": names,
    "expired_entries": expired,
    "with_travel_time": with_travel,
    "oldest_entry": oldest.isoformat() if oldest else None,
    "newest_entry": newest.isoformat() if newest else None,
}

def seed_known_locations():
"""Pre-seed common family locations from family.yaml.

Reads the 'known_locations' section from family.yaml and resolves each
one via goplaces, saving results to the cache. This is a one-time setup
operation โ€” after seeding, all lookups hit the cache.

family.yaml format:
  known_locations:
    - query: "Golrusk Pet Center"
    - query: "Aurora BayCare Green Bay"

Returns list of (query, result) tuples.
"""
config = load_family_config()
known = config.get("known_locations", [])

if not known:
    print("  [LocationCache] No known_locations in family.yaml โ€” nothing to seed")
    return []

results = []
for loc in known:
    query = loc.get("query", "")
    if not query:
        continue
    result = resolve(query, use_home_bias=True)
    results.append((query, result))
    status = "โœ“" if result else "โœ—"
    name = result.get("name", query) if result else query
    print(f"  {status} {query} โ†’ {name}")

return results