Is model choice the only free lunch in AI?

3 hours ago 2

# bench.py

import json, time, yaml, statistics as stats

from pydantic import BaseModel, ValidationError

from openai import OpenAI

# archgw endpoint (keys are handled by archgw)

client = OpenAI(base_url="http://localhost:12000/v1", api_key="n/a")

MODELS = ["arch.summarize.v1", "arch.reason.v1"]

FIXTURES = "evals_summarize.yaml"

# Expected output shape

class SummarizeOut(BaseModel):

title: str

bullets: list[str]

next_actions: list[str]

def load_fixtures(path):

with open(path, "r") as f: return yaml.safe_load(f)["fixtures"]

def must_contain(text: str, anchors: list[str]) -> bool:

t = text.lower()

return all(a.lower() in t for a in anchors)

def schema_fmt(model: type[BaseModel]):

return {"type":"json_object"} # Simplified for broad compatibility

def run_case(model, fx):

t0 = time.perf_counter()

schema = SummarizeOut.model_json_schema()

resp = client.chat.completions.create(

model=model,

messages=[{"role": "system", "content": f"Be concise. Output valid JSON matching this schema:\n{json.dumps(schema)}"},

{"role": "user", "content": fx["input"]}],

response_format=schema_fmt(SummarizeOut)

)

dt = time.perf_counter() - t0

content = resp.choices[0].message.content or "{}"

passed, reasons = True, []

try: data = json.loads(content)

except: return {"ok": False, "lat": dt, "why": "json decode"}

try: SummarizeOut(**data)

except ValidationError: passed=False; reasons.append("schema")

if not must_contain(json.dumps(data), fx.get("must_include", [])):

passed=False; reasons.append("anchors")

return {"ok": passed, "lat": dt, "why": ";".join(reasons)}

def main():

fixtures = load_fixtures(FIXTURES)

for model in MODELS:

results = [run_case(model, fx) for fx in fixtures]

ok = sum(r["ok"] for r in results)

total = len(results)

latencies = [r["lat"] for r in results]

print(f"\n››› {model}")

print(f" Success: {ok}/{total} ({ok/total:.0%})")

if latencies:

avg_lat = stats.mean(latencies)

p95_lat = stats.quantiles(latencies, n=100)[94]

print(f" Latency (ms): avg={avg_lat*1000:.0f}, p95={p95_lat*1000:.0f}")

if __name__ == "__main__":

main()

Read Entire Article