📄 word_count.py 4,383 bytes Apr 23, 2026 📋 Raw

"""
Word Count Enforcement — Ensure generated content meets target length.

Strategies:
1. Pre-prompt: Tell model target word count
2. Post-generation: Check count, regenerate if too short
3. Expand: Ask model to add detail to specific sections

Usage:
from word_count import enforce_word_count

# Check and expand if needed
expanded = await enforce_word_count(
    content=generated_text,
    target=1200,
    model="phi4:14b",
    min_threshold=0.8  # 80% of target
)

"""

import asyncio
from typing import Optional

Import pipeline for regeneration

import sys
sys.path.insert(0, '/home/hoffmann_admin/.openclaw/workspace-socrates/hoffdesk-api')
from content.pipeline import generate_ollama

EXPANSION_PROMPT = """The following blog post is too short ({current_words} words, target is {target_words}).

Your task: EXPAND EXISTING SECTIONS with more technical detail.

RULES:
- ONLY expand sections that already exist (Attempts 1, 2, 3, Fix, Reflection)
- DO NOT add new sections or new failed attempts
- Add exact commands with flags and output
- Describe error messages as they appeared on screen
- Include internal monologue about why each attempt failed
- Expand the reflection with specific lessons learned
- Add more detail to the Fix section (exact commands, output)

DO NOT invent new attempts, new errors, or new systems.
DO NOT add fluff or filler. Only real technical detail.

IMPORTANT: NEVER use specific dates, times, days of week, or years. Use vague time references like "around that time" or "eventually".

ORIGINAL:
{content}

EXPANDED VERSION:"""

async def enforce_word_count(
content: str,
target: int,
model: str = "phi4:14b",
min_threshold: float = 0.8,
max_attempts: int = 2
) -> str:
"""
Ensure content meets minimum word count.

Args:
    content: Generated content
    target: Target word count
    model: Model to use for expansion
    min_threshold: Minimum acceptable ratio (e.g., 0.8 = 80% of target)
    max_attempts: Maximum regeneration attempts

Returns:
    Content meeting word count requirement
"""
current_words = len(content.split())
min_words = int(target * min_threshold)

if current_words >= min_words:
    return content

print(f"Word count: {current_words}/{target} (below {min_threshold*100:.0f}% threshold)")

for attempt in range(max_attempts):
    print(f"Expansion attempt {attempt + 1}/{max_attempts}...")

    prompt = EXPANSION_PROMPT.format(
        current_words=current_words,
        target_words=target,
        content=content
    )

    try:
        expanded = await generate_ollama(
            model=model,
            prompt=prompt,
            temperature=0.6,
            max_tokens=4000,
            timeout=180.0
        )

        new_count = len(expanded.split())
        print(f"  Expanded to {new_count} words")

        if new_count >= min_words:
            return expanded

        content = expanded  # Try again from this version
        current_words = new_count

    except Exception as e:
        print(f"  Expansion failed: {e}")
        break

print(f"Warning: Could not reach target. Returning {current_words} words.")
return content

def add_word_count_to_prompt(prompt: str, target: int) -> str:
"""Inject word count requirement into generation prompt."""
return prompt + f"""

=== LENGTH REQUIREMENT ===
Target: {target} words (minimum {int(target * 0.8)} words)
Current estimate: ___ words

You MUST write at least {target} words. Include:
- Specific commands with exact flags
- Error messages as they appeared
- Internal thought process
- At least 3 failed attempts with detail
- Honest reflection with lessons learned

If you write fewer than {target} words, the content will be rejected.
"""

Test

if name == "main":
short_text = "It was late. I fixed the server. The end."

print(f"Original: {len(short_text.split())} words")

# Just test the prompt injection
prompt = "Write a blog post about DNS."
enhanced = add_word_count_to_prompt(prompt, 1200)
print("\nEnhanced prompt (last 200 chars):")
print(enhanced[-200:])