"""Tripwire — sub-10ms regex signal detection for coordination signals. Tier 1 of the two-tier extraction pipeline. Fires on messages containing coordination intent, time references, family logistics, or event signals. Output: TripwireResult with score, matched patterns, and any coordination keywords found. Design (v2.1 — 2026-05-08 calibration): - Time signals: capped at max() to prevent Chronometer Spam - Base patterns: 0.10–0.25 per match - Family member presence: boosts coordination signal - Coordination keywords: 0.08 each (cap 0.24) - Fire threshold: 0.35 (lowered from 0.4 for better recall) """ import re from dataclasses import dataclass, field from typing import Optional # --------------------------------------------------------------------------- # Patterns # --------------------------------------------------------------------------- PATTERNS = { # Time/schedule indicators "time_specific": re.compile( r"\b(\d{1,2}:\d{2}\s*(AM|PM|am|pm)?|\d{1,2}\s*(AM|PM|am|pm))\b" r"|\b(morning|afternoon|evening|tonight)\b" ), # Bare hour ("at 6", "by 3", "around 5") — no colon or AM/PM needed "hour_only": re.compile( r"\b(at|by|around|before|after)\s+(\d{1,2})\b" ), "day_reference": re.compile( r"\b(today|tomorrow|tonight|yesterday|" r"monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b" ), "vague_time": re.compile(r"\b(this week|next week|this weekend|next weekend)\b"), "date_reference": re.compile( r"\b(\d{1,2}/\d{1,2}|\d{1,2}-\d{1,2}|" r"\b(january|february|march|april|may|june|july|august|" r"september|october|november|december)\s+\d{1,2})\b" ), # High-signal coordination keywords (single pattern, higher weight) "pickup_dropoff": re.compile(r"\b(pick ?up|drop ?off)\b"), "can_you": re.compile(r"\b(can you|could you|would you|will you|are you able to)\b"), "need_someone": re.compile(r"\b(need someone|need a|looking for|anyone available|who can)\b"), "coverage_offer": re.compile( r"\b(i can (take|grab|do)|i(?:'ll| will) (take|grab|do)|" r"let me (take|grab|do)|i could take|i'll handle)\b" ), "swap_change": re.compile(r"\b(swap|switch|reschedule)\b"), # Appointment / medical / school events — high signal "appointment": re.compile( r"\b(appointment|checkup|appt|visit|screening|consultation|procedure)\b" ), "medical": re.compile( r"\b(doctor|dentist|pediatrician|orthodontist|" r"therapy|OT|PT|speech|eye|vision|hearing)\b" ), "school_event": re.compile( r"\b(conference|parent-teacher|school|practice|game|match|" r"recital|concert|performance|ceremony|party|field trip)\b" ), "lesson_class": re.compile( r"\b(lesson|class|tutor|dance|swim|soccer|baseball|" r"basketball|gymnastics|karate|piano)\b" ), # Family name mentions — boost when combined with other signals "name_mention": re.compile( r"\b(Sully|Sullivan|Harper|Aundrea|Matt|Maggie)\b", flags=re.IGNORECASE ), "possessive_name": re.compile( r"\b(\w+'s)\s+(appointment|practice|game|dance|lesson|party|" r"checkup|visit|haircut|recital|concert)\b" ), # Coordination intent "implicit_assign": re.compile( r"\b(is going (with|to)|are going (with|to)|" r"going with (you|me|us)|has to (go|be|get)|needs to (go|be|get))\b" ), "suggestion_intent": re.compile( r"\b(we should|let's|how about|i was thinking|" r"i'd like to|we could|we need to|we have to)\b" ), # Event recall / confirmation "event_recall": re.compile( r"\b(remember we have|don't forget (we|that)|" r"are we still on for|looking forward to)\b" ), "event_ack": re.compile( r"\b(yes, i have|i'll be there|we're going to|see you at)\b" ), # Meal/social — lower signal "meal_meeting": re.compile( r"\b(breakfast|lunch|brunch|dinner|drinks|coffee|" r"meet up|hang out|get together)\b" ), # Offer/help — lower signal "offer_help": re.compile( r"\b(do you want me to|should i|i'll take|i'll get|" r"let me know|i could take)\b" ), # Location — moderate signal "location": re.compile( r"\b(hospital|vet|clinic|office|gym|field|pool|" r"church|library|park|school)\b" ), } TIME_SIGNALS = {"time_specific", "day_reference", "date_reference", "vague_time", "hour_only"} # Base scores per pattern — calibrated v2.1 PATTERN_SCORES = { "time_specific": 0.15, "hour_only": 0.10, "day_reference": 0.12, "date_reference": 0.12, "vague_time": 0.08, "pickup_dropoff": 0.25, "can_you": 0.25, "need_someone": 0.25, "coverage_offer": 0.20, "swap_change": 0.20, "appointment": 0.15, "medical": 0.12, "school_event": 0.12, "lesson_class": 0.10, "name_mention": 0.08, "possessive_name": 0.12, "implicit_assign": 0.20, "suggestion_intent": 0.12, "event_recall": 0.20, "event_ack": 0.12, "meal_meeting": 0.08, "offer_help": 0.12, "location": 0.08, } # Coordination keywords for combo bonus COORDINATION_KEYWORDS = [ "can you", "could you", "would you", "will you", "need someone", "pick up", "drop off", "cover", "watch", "stay with", "babysit", "fill in", "swap", "switch", "reschedule", "who can", ] FIRE_THRESHOLD = 0.35 # v2.1 — lowered from 0.4 for better recall on borderline messages @dataclass class TripwireResult: fired: bool = False score: float = 0.0 patterns_matched: list[str] = field(default_factory=list) has_time_signal: bool = False has_coordination_keyword: bool = False matched_text: str = "" def run_tripwire(text: str) -> TripwireResult: """Run regex tripwire against text. Returns result with score and matches.""" text_lower = text.lower() matched_patterns = [] score = 0.0 time_score_contrib = 0.0 for name, pattern in PATTERNS.items(): if pattern.search(text_lower): matched_patterns.append(name) base = PATTERN_SCORES.get(name, 0.10) if name in TIME_SIGNALS: time_score_contrib = max(time_score_contrib, base) else: score += base # Cap time signal contribution (single max value, not additive) score += time_score_contrib # Coordination keyword combo check keyword_count = sum(1 for kw in COORDINATION_KEYWORDS if kw in text_lower) keyword_count = min(keyword_count, 3) combo_bonus = keyword_count * 0.08 score += min(combo_bonus, 0.24) # Name mention boost: if we have a name + any coordination signal if "name_mention" in matched_patterns and len(matched_patterns) > 1: score += 0.08 # boost for named family member in context result = TripwireResult( fired=score >= FIRE_THRESHOLD, score=round(score, 3), patterns_matched=matched_patterns, has_time_signal=any(p in matched_patterns for p in TIME_SIGNALS), has_coordination_keyword=keyword_count > 0, matched_text=text[:200], ) return result