"""URL fetching with Jina Reader API."""

import re
import os
import sys


def extract_urls(text: str) -> list:
    """Extract HTTP(S) URLs from text."""
    if not text:
        return []
    urls = re.findall(r'https?://[^\s<>"\']+', text)
    return [u.rstrip('.,;:)>]') for u in urls]


def fetch_url_as_markdown(url: str, timeout: int = 30) -> str:
    """Fetch URL content via Jina Reader API, return as Markdown."""
    import urllib.request
    import urllib.error
    
    jina_url = f"https://r.jina.ai/{url}"
    
    req = urllib.request.Request(
        jina_url,
        headers={
            "Accept": "text/markdown",
            "User-Agent": "Mozilla/5.0 (compatible; HoffDesk/1.0)"
        }
    )
    
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            content = resp.read().decode('utf-8', errors='replace')
            return content
    except urllib.error.HTTPError as e:
        return f"[Error fetching URL: HTTP {e.code}]"
    except urllib.error.URLError as e:
        return f"[Error fetching URL: {e.reason}]"
    except Exception as e:
        return f"[Error fetching URL: {e}]"


def enrich_body_with_urls(body: str, max_urls: int = 3) -> str:
    """Extract URLs from body, fetch content, append to body.
    
    For newsletter-style emails with just a URL link (like Smore).
    """
    urls = extract_urls(body)
    if not urls:
        return body
    
    enriched = body
    fetched_count = 0
    
    for url in urls[:max_urls]:
        content = fetch_url_as_markdown(url)
        if content and not content.startswith("[Error"):
            enriched += f"\n\n---\nFetched from {url}:\n{content}"
            fetched_count += 1
    
    return enriched