task-router.xiaoyan/scripts/route_request.py

#!/usr/bin/env python3
import argparse
import json
import re
import sys
from dataclasses import dataclass
from typing import Dict, List


ROUTES = ("python_script", "high_compute_model", "low_compute_model")


@dataclass
class RouteScore:
    name: str
    score: int
    reasons: List[str]


def build_execution_plan(route: str, text: str, confidence: float) -> Dict[str, object]:
    preview = " ".join(text.strip().split())
    if len(preview) > 140:
        preview = preview[:137] + "..."

    if route == "python_script":
        return {
            "execution_type": "run_python",
            "goal": "Handle the request with deterministic code execution.",
            "immediate_action": "Inspect the files/data involved, then write or run a focused Python script.",
            "codex_instruction": "Execute the task with Python first. Use the model only to design the script or explain the result.",
            "artifacts_to_produce": [
                "a Python script or one-off Python command",
                "structured output or generated files",
                "a concise summary of what was processed",
            ],
            "escalate_if": [
                "the script needs significant algorithm or architecture design",
                "requirements are ambiguous before coding can start",
            ],
            "request_preview": preview,
        }

    if route == "high_compute_model":
        return {
            "execution_type": "run_high_compute_model",
            "goal": "Handle the request with deeper reasoning before taking action.",
            "immediate_action": "Use a stronger model to analyze the task, resolve ambiguity, and produce the answer or plan.",
            "codex_instruction": "Give the task to a stronger model path first. If execution is later needed, convert the resulting plan into code or commands.",
            "artifacts_to_produce": [
                "a detailed answer, design, or plan",
                "explicit tradeoffs, assumptions, or decision criteria",
            ],
            "escalate_if": [
                "the task becomes procedural after planning",
                "the answer requires file processing or repeatable transformations",
            ],
            "request_preview": preview,
        }

    return {
        "execution_type": "run_low_compute_model",
        "goal": "Handle the request with the cheapest viable language-model pass.",
        "immediate_action": "Use a lightweight model path for a fast first answer.",
        "codex_instruction": "Start with a cheaper/faster model. Escalate only if the output is weak, incomplete, or the task expands.",
        "artifacts_to_produce": [
            "a short answer or rewrite",
            "minimal reasoning with quick turnaround",
        ],
        "escalate_if": [
            "the request turns out to be ambiguous",
            "the first pass fails quality checks",
            "multiple retries would cost more than escalating once",
        ],
        "request_preview": preview,
    }


def normalize(text: str) -> str:
    text = text.strip().lower()
    text = re.sub(r"\s+", " ", text)
    return text


def keyword_hits(text: str, keywords: List[str]) -> List[str]:
    hits = []
    for keyword in keywords:
        if keyword in text:
            hits.append(keyword)
    return hits


def score_python_route(text: str) -> RouteScore:
    reasons: List[str] = []
    score = 0

    deterministic_hits = keyword_hits(
        text,
        [
            "python",
            "script",
            "csv",
            "json",
            "yaml",
            "xml",
            "excel",
            "spreadsheet",
            "parse",
            "extract",
            "transform",
            "convert",
            "rename",
            "batch",
            "directory",
            "folder",
            "file",
            "files",
            "dataset",
            "log",
            "logs",
            "calculate",
            "count",
            "sort",
            "filter",
            "regex",
            "scrape",
        ],
    )
    if deterministic_hits:
        score += 4 + min(len(deterministic_hits), 6)
        reasons.append(
            "deterministic data/file-processing signals: "
            + ", ".join(deterministic_hits[:6])
        )

    if any(token in text for token in ["automate", "repeatedly", "pipeline", "generate report"]):
        score += 3
        reasons.append("request looks repetitive or automation-friendly")

    if any(token in text for token in ["exact", "precise", "reproducible", "structured output"]):
        score += 2
        reasons.append("request favors reproducible execution over free-form reasoning")

    return RouteScore("python_script", score, reasons)


def score_high_route(text: str) -> RouteScore:
    reasons: List[str] = []
    score = 0

    reasoning_hits = keyword_hits(
        text,
        [
            "analyze",
            "analysis",
            "design",
            "architect",
            "strategy",
            "compare",
            "tradeoff",
            "debug",
            "root cause",
            "plan",
            "complex",
            "hard",
            "unclear",
            "ambiguous",
            "research",
            "brainstorm",
            "proposal",
            "spec",
        ],
    )
    if reasoning_hits:
        score += 4 + min(len(reasoning_hits), 6)
        reasons.append(
            "open-ended reasoning signals: " + ", ".join(reasoning_hits[:6])
        )

    if any(
        token in text
        for token in ["step by step", "carefully", "deeply", "thoroughly", "rigorous"]
    ):
        score += 3
        reasons.append("user explicitly asks for deeper or more careful reasoning")

    if len(text.split()) > 80:
        score += 2
        reasons.append("request is long enough to suggest higher-context reasoning")

    return RouteScore("high_compute_model", score, reasons)


def score_low_route(text: str) -> RouteScore:
    reasons: List[str] = []
    score = 0

    lightweight_hits = keyword_hits(
        text,
        [
            "rewrite",
            "rephrase",
            "translate",
            "summarize",
            "summary",
            "classify",
            "tag",
            "format",
            "clean up",
            "fix grammar",
            "short answer",
            "quick",
            "simple",
        ],
    )
    if lightweight_hits:
        score += 4 + min(len(lightweight_hits), 5)
        reasons.append(
            "lightweight language-task signals: " + ", ".join(lightweight_hits[:6])
        )

    if len(text.split()) <= 25:
        score += 2
        reasons.append("request is short and likely cheap to answer")

    if any(token in text for token in ["cheap", "fast", "brief"]):
        score += 2
        reasons.append("user is optimizing for speed or lower cost")

    return RouteScore("low_compute_model", score, reasons)


def choose_route(text: str) -> Dict[str, object]:
    normalized = normalize(text)
    if not normalized:
        execution_plan = build_execution_plan("low_compute_model", text, 0.25)
        return {
            "route": "low_compute_model",
            "confidence": 0.25,
            "reasons": ["empty request defaults to the lowest-cost model"],
            "scores": {route: 0 for route in ROUTES},
            "execution_plan": execution_plan,
        }

    scored_routes = [
        score_python_route(normalized),
        score_high_route(normalized),
        score_low_route(normalized),
    ]
    scored_routes.sort(key=lambda item: item.score, reverse=True)

    winner = scored_routes[0]
    runner_up = scored_routes[1]

    if winner.score == 0:
        winner = RouteScore(
            "high_compute_model",
            1,
            ["fallback to the stronger model because the task is not obviously deterministic or trivial"],
        )
        runner_up = RouteScore("low_compute_model", 0, [])

    margin = max(winner.score - runner_up.score, 0)
    confidence = min(0.55 + 0.1 * margin, 0.95)

    recommended_next_action = {
        "python_script": "Prefer executing or writing a Python script first, then use a model only for glue logic or explanation.",
        "high_compute_model": "Prefer a stronger model for planning, ambiguity resolution, or multi-step reasoning.",
        "low_compute_model": "Prefer a cheaper/faster model for the first pass and escalate only if it struggles.",
    }[winner.name]
    confidence = round(confidence, 2)
    execution_plan = build_execution_plan(winner.name, text, confidence)

    return {
        "route": winner.name,
        "confidence": confidence,
        "reasons": winner.reasons,
        "scores": {item.name: item.score for item in scored_routes},
        "recommended_next_action": recommended_next_action,
        "execution_plan": execution_plan,
    }


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Route a request to python_script, high_compute_model, or low_compute_model."
    )
    parser.add_argument("--text", help="Request text to classify. If omitted, read from stdin.")
    parser.add_argument(
        "--pretty",
        action="store_true",
        help="Pretty-print JSON output.",
    )
    parser.add_argument(
        "--summary",
        action="store_true",
        help="Print a compact human-readable routing summary instead of JSON.",
    )
    args = parser.parse_args()

    text = args.text if args.text is not None else sys.stdin.read()
    result = choose_route(text)
    if args.summary:
        print(f"Route: {result['route']}")
        print("Why: " + "; ".join(result["reasons"][:2]))
        print("Next step: " + result["execution_plan"]["immediate_action"])
    elif args.pretty:
        print(json.dumps(result, indent=2, ensure_ascii=True))
    else:
        print(json.dumps(result, ensure_ascii=True))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())