315 lines
9.8 KiB
Python
Executable File
315 lines
9.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List
|
|
|
|
|
|
ROUTES = ("python_script", "high_compute_model", "low_compute_model")
|
|
|
|
|
|
@dataclass
|
|
class RouteScore:
|
|
name: str
|
|
score: int
|
|
reasons: List[str]
|
|
|
|
|
|
def build_execution_plan(route: str, text: str, confidence: float) -> Dict[str, object]:
|
|
preview = " ".join(text.strip().split())
|
|
if len(preview) > 140:
|
|
preview = preview[:137] + "..."
|
|
|
|
if route == "python_script":
|
|
return {
|
|
"execution_type": "run_python",
|
|
"goal": "Handle the request with deterministic code execution.",
|
|
"immediate_action": "Inspect the files/data involved, then write or run a focused Python script.",
|
|
"codex_instruction": "Execute the task with Python first. Use the model only to design the script or explain the result.",
|
|
"artifacts_to_produce": [
|
|
"a Python script or one-off Python command",
|
|
"structured output or generated files",
|
|
"a concise summary of what was processed",
|
|
],
|
|
"escalate_if": [
|
|
"the script needs significant algorithm or architecture design",
|
|
"requirements are ambiguous before coding can start",
|
|
],
|
|
"request_preview": preview,
|
|
}
|
|
|
|
if route == "high_compute_model":
|
|
return {
|
|
"execution_type": "run_high_compute_model",
|
|
"goal": "Handle the request with deeper reasoning before taking action.",
|
|
"immediate_action": "Use a stronger model to analyze the task, resolve ambiguity, and produce the answer or plan.",
|
|
"codex_instruction": "Give the task to a stronger model path first. If execution is later needed, convert the resulting plan into code or commands.",
|
|
"artifacts_to_produce": [
|
|
"a detailed answer, design, or plan",
|
|
"explicit tradeoffs, assumptions, or decision criteria",
|
|
],
|
|
"escalate_if": [
|
|
"the task becomes procedural after planning",
|
|
"the answer requires file processing or repeatable transformations",
|
|
],
|
|
"request_preview": preview,
|
|
}
|
|
|
|
return {
|
|
"execution_type": "run_low_compute_model",
|
|
"goal": "Handle the request with the cheapest viable language-model pass.",
|
|
"immediate_action": "Use a lightweight model path for a fast first answer.",
|
|
"codex_instruction": "Start with a cheaper/faster model. Escalate only if the output is weak, incomplete, or the task expands.",
|
|
"artifacts_to_produce": [
|
|
"a short answer or rewrite",
|
|
"minimal reasoning with quick turnaround",
|
|
],
|
|
"escalate_if": [
|
|
"the request turns out to be ambiguous",
|
|
"the first pass fails quality checks",
|
|
"multiple retries would cost more than escalating once",
|
|
],
|
|
"request_preview": preview,
|
|
}
|
|
|
|
|
|
def normalize(text: str) -> str:
|
|
text = text.strip().lower()
|
|
text = re.sub(r"\s+", " ", text)
|
|
return text
|
|
|
|
|
|
def keyword_hits(text: str, keywords: List[str]) -> List[str]:
|
|
hits = []
|
|
for keyword in keywords:
|
|
if keyword in text:
|
|
hits.append(keyword)
|
|
return hits
|
|
|
|
|
|
def score_python_route(text: str) -> RouteScore:
|
|
reasons: List[str] = []
|
|
score = 0
|
|
|
|
deterministic_hits = keyword_hits(
|
|
text,
|
|
[
|
|
"python",
|
|
"script",
|
|
"csv",
|
|
"json",
|
|
"yaml",
|
|
"xml",
|
|
"excel",
|
|
"spreadsheet",
|
|
"parse",
|
|
"extract",
|
|
"transform",
|
|
"convert",
|
|
"rename",
|
|
"batch",
|
|
"directory",
|
|
"folder",
|
|
"file",
|
|
"files",
|
|
"dataset",
|
|
"log",
|
|
"logs",
|
|
"calculate",
|
|
"count",
|
|
"sort",
|
|
"filter",
|
|
"regex",
|
|
"scrape",
|
|
],
|
|
)
|
|
if deterministic_hits:
|
|
score += 4 + min(len(deterministic_hits), 6)
|
|
reasons.append(
|
|
"deterministic data/file-processing signals: "
|
|
+ ", ".join(deterministic_hits[:6])
|
|
)
|
|
|
|
if any(token in text for token in ["automate", "repeatedly", "pipeline", "generate report"]):
|
|
score += 3
|
|
reasons.append("request looks repetitive or automation-friendly")
|
|
|
|
if any(token in text for token in ["exact", "precise", "reproducible", "structured output"]):
|
|
score += 2
|
|
reasons.append("request favors reproducible execution over free-form reasoning")
|
|
|
|
return RouteScore("python_script", score, reasons)
|
|
|
|
|
|
def score_high_route(text: str) -> RouteScore:
|
|
reasons: List[str] = []
|
|
score = 0
|
|
|
|
reasoning_hits = keyword_hits(
|
|
text,
|
|
[
|
|
"analyze",
|
|
"analysis",
|
|
"design",
|
|
"architect",
|
|
"strategy",
|
|
"compare",
|
|
"tradeoff",
|
|
"debug",
|
|
"root cause",
|
|
"plan",
|
|
"complex",
|
|
"hard",
|
|
"unclear",
|
|
"ambiguous",
|
|
"research",
|
|
"brainstorm",
|
|
"proposal",
|
|
"spec",
|
|
],
|
|
)
|
|
if reasoning_hits:
|
|
score += 4 + min(len(reasoning_hits), 6)
|
|
reasons.append(
|
|
"open-ended reasoning signals: " + ", ".join(reasoning_hits[:6])
|
|
)
|
|
|
|
if any(
|
|
token in text
|
|
for token in ["step by step", "carefully", "deeply", "thoroughly", "rigorous"]
|
|
):
|
|
score += 3
|
|
reasons.append("user explicitly asks for deeper or more careful reasoning")
|
|
|
|
if len(text.split()) > 80:
|
|
score += 2
|
|
reasons.append("request is long enough to suggest higher-context reasoning")
|
|
|
|
return RouteScore("high_compute_model", score, reasons)
|
|
|
|
|
|
def score_low_route(text: str) -> RouteScore:
|
|
reasons: List[str] = []
|
|
score = 0
|
|
|
|
lightweight_hits = keyword_hits(
|
|
text,
|
|
[
|
|
"rewrite",
|
|
"rephrase",
|
|
"translate",
|
|
"summarize",
|
|
"summary",
|
|
"classify",
|
|
"tag",
|
|
"format",
|
|
"clean up",
|
|
"fix grammar",
|
|
"short answer",
|
|
"quick",
|
|
"simple",
|
|
],
|
|
)
|
|
if lightweight_hits:
|
|
score += 4 + min(len(lightweight_hits), 5)
|
|
reasons.append(
|
|
"lightweight language-task signals: " + ", ".join(lightweight_hits[:6])
|
|
)
|
|
|
|
if len(text.split()) <= 25:
|
|
score += 2
|
|
reasons.append("request is short and likely cheap to answer")
|
|
|
|
if any(token in text for token in ["cheap", "fast", "brief"]):
|
|
score += 2
|
|
reasons.append("user is optimizing for speed or lower cost")
|
|
|
|
return RouteScore("low_compute_model", score, reasons)
|
|
|
|
|
|
def choose_route(text: str) -> Dict[str, object]:
|
|
normalized = normalize(text)
|
|
if not normalized:
|
|
execution_plan = build_execution_plan("low_compute_model", text, 0.25)
|
|
return {
|
|
"route": "low_compute_model",
|
|
"confidence": 0.25,
|
|
"reasons": ["empty request defaults to the lowest-cost model"],
|
|
"scores": {route: 0 for route in ROUTES},
|
|
"execution_plan": execution_plan,
|
|
}
|
|
|
|
scored_routes = [
|
|
score_python_route(normalized),
|
|
score_high_route(normalized),
|
|
score_low_route(normalized),
|
|
]
|
|
scored_routes.sort(key=lambda item: item.score, reverse=True)
|
|
|
|
winner = scored_routes[0]
|
|
runner_up = scored_routes[1]
|
|
|
|
if winner.score == 0:
|
|
winner = RouteScore(
|
|
"high_compute_model",
|
|
1,
|
|
["fallback to the stronger model because the task is not obviously deterministic or trivial"],
|
|
)
|
|
runner_up = RouteScore("low_compute_model", 0, [])
|
|
|
|
margin = max(winner.score - runner_up.score, 0)
|
|
confidence = min(0.55 + 0.1 * margin, 0.95)
|
|
|
|
recommended_next_action = {
|
|
"python_script": "Prefer executing or writing a Python script first, then use a model only for glue logic or explanation.",
|
|
"high_compute_model": "Prefer a stronger model for planning, ambiguity resolution, or multi-step reasoning.",
|
|
"low_compute_model": "Prefer a cheaper/faster model for the first pass and escalate only if it struggles.",
|
|
}[winner.name]
|
|
confidence = round(confidence, 2)
|
|
execution_plan = build_execution_plan(winner.name, text, confidence)
|
|
|
|
return {
|
|
"route": winner.name,
|
|
"confidence": confidence,
|
|
"reasons": winner.reasons,
|
|
"scores": {item.name: item.score for item in scored_routes},
|
|
"recommended_next_action": recommended_next_action,
|
|
"execution_plan": execution_plan,
|
|
}
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Route a request to python_script, high_compute_model, or low_compute_model."
|
|
)
|
|
parser.add_argument("--text", help="Request text to classify. If omitted, read from stdin.")
|
|
parser.add_argument(
|
|
"--pretty",
|
|
action="store_true",
|
|
help="Pretty-print JSON output.",
|
|
)
|
|
parser.add_argument(
|
|
"--summary",
|
|
action="store_true",
|
|
help="Print a compact human-readable routing summary instead of JSON.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
text = args.text if args.text is not None else sys.stdin.read()
|
|
result = choose_route(text)
|
|
if args.summary:
|
|
print(f"Route: {result['route']}")
|
|
print("Why: " + "; ".join(result["reasons"][:2]))
|
|
print("Next step: " + result["execution_plan"]["immediate_action"])
|
|
elif args.pretty:
|
|
print(json.dumps(result, indent=2, ensure_ascii=True))
|
|
else:
|
|
print(json.dumps(result, ensure_ascii=True))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|