Fixed pricing searches. Still not working for >200k tokens

This commit is contained in:
2026-06-04 17:00:43 -07:00
parent 9c2dd68a28
commit 5c588cbe4f
+20 -9
View File
@@ -45,12 +45,18 @@ def fetch_pricing_for_model(client, target_model):
print("Parsing pricing data via background AI session...") print("Parsing pricing data via background AI session...")
try: try:
# We use a fast, cheap model just for the parsing task
res = client.models.generate_content( res = client.models.generate_content(
model="gemini-3.1-flash-lite", model="gemini-3.1-flash-lite",
contents=prompt, contents=prompt,
config=config config=config
) )
new_data = json.loads(res.text) new_data = json.loads(res.text)
# Strip the array wrapper if the AI returned a list instead of a pure dict
if isinstance(new_data, list) and len(new_data) > 0:
new_data = new_data[0]
print(f"Successfully retrieved pricing for {target_model}.") print(f"Successfully retrieved pricing for {target_model}.")
print(new_data) print(new_data)
return new_data return new_data
@@ -96,21 +102,24 @@ def main():
if os.path.exists(PRICING_FILE): if os.path.exists(PRICING_FILE):
try: try:
with open(PRICING_FILE, "r") as f: with open(PRICING_FILE, "r") as f:
file_pricing = json.load(f) pricing_data = json.load(f)
pricing_data.update(file_pricing)
except json.JSONDecodeError: except json.JSONDecodeError:
print(f"Warning: {PRICING_FILE} is corrupted. Using defaults.", file=sys.stderr) print(f"Warning: {PRICING_FILE} is corrupted. Starting fresh.")
# Fetch pricing if forced, or if the model isn't currently tracked
if args.pricing or args.model not in pricing_data: if args.pricing or args.model not in pricing_data:
new_pricing = fetch_pricing_for_model(client, args.model) new_pricing = fetch_pricing_for_model(client, args.model)
if new_pricing:
if new_pricing and args.model in new_pricing:
pricing_data.update(new_pricing) pricing_data.update(new_pricing)
with open(PRICING_FILE, "w") as f: with open(PRICING_FILE, "w") as f:
json.dump(pricing_data, f, indent=4) json.dump(pricing_data, f, indent=4)
elif not os.path.exists(PRICING_FILE): else:
with open(PRICING_FILE, "w") as f: print(f"Warning: Could not fetch pricing for {args.model}. Estimating cost at $0.00.", file=sys.stderr)
json.dump(pricing_data, f, indent=4) # Add a fallback zero-value so the script doesn't crash during cost calculation
pricing_data[args.model] = {"input": 0.0, "cached": 0.0, "output": 0.0}
# If the user only requested a pricing update and nothing else, exit cleanly
if args.pricing and not prompt_text and not args.files and not args.destroy and not args.clear_history: if args.pricing and not prompt_text and not args.files and not args.destroy and not args.clear_history:
return return
@@ -308,7 +317,7 @@ def main():
config = types.GenerateContentConfig(**config_kwargs) config = types.GenerateContentConfig(**config_kwargs)
print("Generating response (this may take a moment for large outputs)...") print("Generating response (this may take a moment for large outputs)...\n")
response_stream = client.models.generate_content_stream( response_stream = client.models.generate_content_stream(
model=args.model, model=args.model,
@@ -352,7 +361,9 @@ def main():
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0 cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
uncached_tokens = max(0, prompt_tokens - cached_tokens) uncached_tokens = max(0, prompt_tokens - cached_tokens)
rates = pricing_data.get(args.model, pricing_data.get("default"))
# Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
input_cost = (uncached_tokens / 1_000_000) * rates["input"] input_cost = (uncached_tokens / 1_000_000) * rates["input"]
cached_cost = (cached_tokens / 1_000_000) * rates["cached"] cached_cost = (cached_tokens / 1_000_000) * rates["cached"]