📄 tripwire.py 7,233 bytes Yesterday 14:43 📋 Raw

"""Tripwire — sub-10ms regex signal detection for coordination signals.

Tier 1 of the two-tier extraction pipeline.
Fires on messages containing coordination intent, time references,
family logistics, or event signals.

Output: TripwireResult with score, matched patterns, and any
coordination keywords found.

Design (v2.1 — 2026-05-08 calibration):
- Time signals: capped at max() to prevent Chronometer Spam
- Base patterns: 0.10–0.25 per match
- Family member presence: boosts coordination signal
- Coordination keywords: 0.08 each (cap 0.24)
- Fire threshold: 0.35 (lowered from 0.4 for better recall)
"""

import re
from dataclasses import dataclass, field
from typing import Optional

---------------------------------------------------------------------------

Patterns

---------------------------------------------------------------------------

PATTERNS = {
# Time/schedule indicators
"time_specific": re.compile(
r"\b(\d{1,2}:\d{2}\s(AM|PM|am|pm)?|\d{1,2}\s(AM|PM|am|pm))\b"
r"|\b(morning|afternoon|evening|tonight)\b"
),
# Bare hour ("at 6", "by 3", "around 5") — no colon or AM/PM needed
"hour_only": re.compile(
r"\b(at|by|around|before|after)\s+(\d{1,2})\b"
),
"day_reference": re.compile(
r"\b(today|tomorrow|tonight|yesterday|"
r"monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b"
),
"vague_time": re.compile(r"\b(this week|next week|this weekend|next weekend)\b"),
"date_reference": re.compile(
r"\b(\d{1,2}/\d{1,2}|\d{1,2}-\d{1,2}|"
r"\b(january|february|march|april|may|june|july|august|"
r"september|october|november|december)\s+\d{1,2})\b"
),

# High-signal coordination keywords (single pattern, higher weight)
"pickup_dropoff": re.compile(r"\b(pick ?up|drop ?off)\b"),
"can_you": re.compile(r"\b(can you|could you|would you|will you|are you able to)\b"),
"need_someone": re.compile(r"\b(need someone|need a|looking for|anyone available|who can)\b"),
"coverage_offer": re.compile(
    r"\b(i can (take|grab|do)|i(?:'ll| will) (take|grab|do)|"
    r"let me (take|grab|do)|i could take|i'll handle)\b"
),
"swap_change": re.compile(r"\b(swap|switch|reschedule)\b"),

# Appointment / medical / school events — high signal
"appointment": re.compile(
    r"\b(appointment|checkup|appt|visit|screening|consultation|procedure)\b"
),
"medical": re.compile(
    r"\b(doctor|dentist|pediatrician|orthodontist|"
    r"therapy|OT|PT|speech|eye|vision|hearing)\b"
),
"school_event": re.compile(
    r"\b(conference|parent-teacher|school|practice|game|match|"
    r"recital|concert|performance|ceremony|party|field trip)\b"
),
"lesson_class": re.compile(
    r"\b(lesson|class|tutor|dance|swim|soccer|baseball|"
    r"basketball|gymnastics|karate|piano)\b"
),

# Family name mentions — boost when combined with other signals
"name_mention": re.compile(
    r"\b(Sully|Sullivan|Harper|Aundrea|Matt|Maggie)\b",
    flags=re.IGNORECASE
),
"possessive_name": re.compile(
    r"\b(\w+'s)\s+(appointment|practice|game|dance|lesson|party|"
    r"checkup|visit|haircut|recital|concert)\b"
),

# Coordination intent
"implicit_assign": re.compile(
    r"\b(is going (with|to)|are going (with|to)|"
    r"going with (you|me|us)|has to (go|be|get)|needs to (go|be|get))\b"
),
"suggestion_intent": re.compile(
    r"\b(we should|let's|how about|i was thinking|"
    r"i'd like to|we could|we need to|we have to)\b"
),

# Event recall / confirmation
"event_recall": re.compile(
    r"\b(remember we have|don't forget (we|that)|"
    r"are we still on for|looking forward to)\b"
),
"event_ack": re.compile(
    r"\b(yes, i have|i'll be there|we're going to|see you at)\b"
),

# Meal/social — lower signal
"meal_meeting": re.compile(
    r"\b(breakfast|lunch|brunch|dinner|drinks|coffee|"
    r"meet up|hang out|get together)\b"
),

# Offer/help — lower signal
"offer_help": re.compile(
    r"\b(do you want me to|should i|i'll take|i'll get|"
    r"let me know|i could take)\b"
),

# Location — moderate signal
"location": re.compile(
    r"\b(hospital|vet|clinic|office|gym|field|pool|"
    r"church|library|park|school)\b"
),

}

TIME_SIGNALS = {"time_specific", "day_reference", "date_reference", "vague_time", "hour_only"}

Base scores per pattern — calibrated v2.1

PATTERN_SCORES = {
"time_specific": 0.15,
"hour_only": 0.10,
"day_reference": 0.12,
"date_reference": 0.12,
"vague_time": 0.08,
"pickup_dropoff": 0.25,
"can_you": 0.25,
"need_someone": 0.25,
"coverage_offer": 0.20,
"swap_change": 0.20,
"appointment": 0.15,
"medical": 0.12,
"school_event": 0.12,
"lesson_class": 0.10,
"name_mention": 0.08,
"possessive_name": 0.12,
"implicit_assign": 0.20,
"suggestion_intent": 0.12,
"event_recall": 0.20,
"event_ack": 0.12,
"meal_meeting": 0.08,
"offer_help": 0.12,
"location": 0.08,
}

Coordination keywords for combo bonus

COORDINATION_KEYWORDS = [
"can you", "could you", "would you", "will you",
"need someone", "pick up", "drop off",
"cover", "watch", "stay with", "babysit", "fill in",
"swap", "switch", "reschedule",
"who can",
]

FIRE_THRESHOLD = 0.35 # v2.1 — lowered from 0.4 for better recall on borderline messages

@dataclass
class TripwireResult:
fired: bool = False
score: float = 0.0
patterns_matched: list[str] = field(default_factory=list)
has_time_signal: bool = False
has_coordination_keyword: bool = False
matched_text: str = ""

def run_tripwire(text: str) -> TripwireResult:
"""Run regex tripwire against text. Returns result with score and matches."""
text_lower = text.lower()
matched_patterns = []
score = 0.0
time_score_contrib = 0.0

for name, pattern in PATTERNS.items():
    if pattern.search(text_lower):
        matched_patterns.append(name)
        base = PATTERN_SCORES.get(name, 0.10)
        if name in TIME_SIGNALS:
            time_score_contrib = max(time_score_contrib, base)
        else:
            score += base

# Cap time signal contribution (single max value, not additive)
score += time_score_contrib

# Coordination keyword combo check
keyword_count = sum(1 for kw in COORDINATION_KEYWORDS if kw in text_lower)
keyword_count = min(keyword_count, 3)
combo_bonus = keyword_count * 0.08
score += min(combo_bonus, 0.24)

# Name mention boost: if we have a name + any coordination signal
if "name_mention" in matched_patterns and len(matched_patterns) > 1:
    score += 0.08  # boost for named family member in context

result = TripwireResult(
    fired=score >= FIRE_THRESHOLD,
    score=round(score, 3),
    patterns_matched=matched_patterns,
    has_time_signal=any(p in matched_patterns for p in TIME_SIGNALS),
    has_coordination_keyword=keyword_count > 0,
    matched_text=text[:200],
)

return result