task-router.xiaoyan/scripts/route_request.py
2026-04-03 16:32:05 +08:00

315 lines
9.8 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import json
import re
import sys
from dataclasses import dataclass
from typing import Dict, List
ROUTES = ("python_script", "high_compute_model", "low_compute_model")
@dataclass
class RouteScore:
name: str
score: int
reasons: List[str]
def build_execution_plan(route: str, text: str, confidence: float) -> Dict[str, object]:
preview = " ".join(text.strip().split())
if len(preview) > 140:
preview = preview[:137] + "..."
if route == "python_script":
return {
"execution_type": "run_python",
"goal": "Handle the request with deterministic code execution.",
"immediate_action": "Inspect the files/data involved, then write or run a focused Python script.",
"codex_instruction": "Execute the task with Python first. Use the model only to design the script or explain the result.",
"artifacts_to_produce": [
"a Python script or one-off Python command",
"structured output or generated files",
"a concise summary of what was processed",
],
"escalate_if": [
"the script needs significant algorithm or architecture design",
"requirements are ambiguous before coding can start",
],
"request_preview": preview,
}
if route == "high_compute_model":
return {
"execution_type": "run_high_compute_model",
"goal": "Handle the request with deeper reasoning before taking action.",
"immediate_action": "Use a stronger model to analyze the task, resolve ambiguity, and produce the answer or plan.",
"codex_instruction": "Give the task to a stronger model path first. If execution is later needed, convert the resulting plan into code or commands.",
"artifacts_to_produce": [
"a detailed answer, design, or plan",
"explicit tradeoffs, assumptions, or decision criteria",
],
"escalate_if": [
"the task becomes procedural after planning",
"the answer requires file processing or repeatable transformations",
],
"request_preview": preview,
}
return {
"execution_type": "run_low_compute_model",
"goal": "Handle the request with the cheapest viable language-model pass.",
"immediate_action": "Use a lightweight model path for a fast first answer.",
"codex_instruction": "Start with a cheaper/faster model. Escalate only if the output is weak, incomplete, or the task expands.",
"artifacts_to_produce": [
"a short answer or rewrite",
"minimal reasoning with quick turnaround",
],
"escalate_if": [
"the request turns out to be ambiguous",
"the first pass fails quality checks",
"multiple retries would cost more than escalating once",
],
"request_preview": preview,
}
def normalize(text: str) -> str:
text = text.strip().lower()
text = re.sub(r"\s+", " ", text)
return text
def keyword_hits(text: str, keywords: List[str]) -> List[str]:
hits = []
for keyword in keywords:
if keyword in text:
hits.append(keyword)
return hits
def score_python_route(text: str) -> RouteScore:
reasons: List[str] = []
score = 0
deterministic_hits = keyword_hits(
text,
[
"python",
"script",
"csv",
"json",
"yaml",
"xml",
"excel",
"spreadsheet",
"parse",
"extract",
"transform",
"convert",
"rename",
"batch",
"directory",
"folder",
"file",
"files",
"dataset",
"log",
"logs",
"calculate",
"count",
"sort",
"filter",
"regex",
"scrape",
],
)
if deterministic_hits:
score += 4 + min(len(deterministic_hits), 6)
reasons.append(
"deterministic data/file-processing signals: "
+ ", ".join(deterministic_hits[:6])
)
if any(token in text for token in ["automate", "repeatedly", "pipeline", "generate report"]):
score += 3
reasons.append("request looks repetitive or automation-friendly")
if any(token in text for token in ["exact", "precise", "reproducible", "structured output"]):
score += 2
reasons.append("request favors reproducible execution over free-form reasoning")
return RouteScore("python_script", score, reasons)
def score_high_route(text: str) -> RouteScore:
reasons: List[str] = []
score = 0
reasoning_hits = keyword_hits(
text,
[
"analyze",
"analysis",
"design",
"architect",
"strategy",
"compare",
"tradeoff",
"debug",
"root cause",
"plan",
"complex",
"hard",
"unclear",
"ambiguous",
"research",
"brainstorm",
"proposal",
"spec",
],
)
if reasoning_hits:
score += 4 + min(len(reasoning_hits), 6)
reasons.append(
"open-ended reasoning signals: " + ", ".join(reasoning_hits[:6])
)
if any(
token in text
for token in ["step by step", "carefully", "deeply", "thoroughly", "rigorous"]
):
score += 3
reasons.append("user explicitly asks for deeper or more careful reasoning")
if len(text.split()) > 80:
score += 2
reasons.append("request is long enough to suggest higher-context reasoning")
return RouteScore("high_compute_model", score, reasons)
def score_low_route(text: str) -> RouteScore:
reasons: List[str] = []
score = 0
lightweight_hits = keyword_hits(
text,
[
"rewrite",
"rephrase",
"translate",
"summarize",
"summary",
"classify",
"tag",
"format",
"clean up",
"fix grammar",
"short answer",
"quick",
"simple",
],
)
if lightweight_hits:
score += 4 + min(len(lightweight_hits), 5)
reasons.append(
"lightweight language-task signals: " + ", ".join(lightweight_hits[:6])
)
if len(text.split()) <= 25:
score += 2
reasons.append("request is short and likely cheap to answer")
if any(token in text for token in ["cheap", "fast", "brief"]):
score += 2
reasons.append("user is optimizing for speed or lower cost")
return RouteScore("low_compute_model", score, reasons)
def choose_route(text: str) -> Dict[str, object]:
normalized = normalize(text)
if not normalized:
execution_plan = build_execution_plan("low_compute_model", text, 0.25)
return {
"route": "low_compute_model",
"confidence": 0.25,
"reasons": ["empty request defaults to the lowest-cost model"],
"scores": {route: 0 for route in ROUTES},
"execution_plan": execution_plan,
}
scored_routes = [
score_python_route(normalized),
score_high_route(normalized),
score_low_route(normalized),
]
scored_routes.sort(key=lambda item: item.score, reverse=True)
winner = scored_routes[0]
runner_up = scored_routes[1]
if winner.score == 0:
winner = RouteScore(
"high_compute_model",
1,
["fallback to the stronger model because the task is not obviously deterministic or trivial"],
)
runner_up = RouteScore("low_compute_model", 0, [])
margin = max(winner.score - runner_up.score, 0)
confidence = min(0.55 + 0.1 * margin, 0.95)
recommended_next_action = {
"python_script": "Prefer executing or writing a Python script first, then use a model only for glue logic or explanation.",
"high_compute_model": "Prefer a stronger model for planning, ambiguity resolution, or multi-step reasoning.",
"low_compute_model": "Prefer a cheaper/faster model for the first pass and escalate only if it struggles.",
}[winner.name]
confidence = round(confidence, 2)
execution_plan = build_execution_plan(winner.name, text, confidence)
return {
"route": winner.name,
"confidence": confidence,
"reasons": winner.reasons,
"scores": {item.name: item.score for item in scored_routes},
"recommended_next_action": recommended_next_action,
"execution_plan": execution_plan,
}
def main() -> int:
parser = argparse.ArgumentParser(
description="Route a request to python_script, high_compute_model, or low_compute_model."
)
parser.add_argument("--text", help="Request text to classify. If omitted, read from stdin.")
parser.add_argument(
"--pretty",
action="store_true",
help="Pretty-print JSON output.",
)
parser.add_argument(
"--summary",
action="store_true",
help="Print a compact human-readable routing summary instead of JSON.",
)
args = parser.parse_args()
text = args.text if args.text is not None else sys.stdin.read()
result = choose_route(text)
if args.summary:
print(f"Route: {result['route']}")
print("Why: " + "; ".join(result["reasons"][:2]))
print("Next step: " + result["execution_plan"]["immediate_action"])
elif args.pretty:
print(json.dumps(result, indent=2, ensure_ascii=True))
else:
print(json.dumps(result, ensure_ascii=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())