Fixed pricing searches. Still not working for >200k tokens
This commit is contained in:
+21
-10
@@ -45,12 +45,18 @@ def fetch_pricing_for_model(client, target_model):
|
||||
|
||||
print("Parsing pricing data via background AI session...")
|
||||
try:
|
||||
# We use a fast, cheap model just for the parsing task
|
||||
res = client.models.generate_content(
|
||||
model="gemini-3.1-flash-lite",
|
||||
contents=prompt,
|
||||
config=config
|
||||
)
|
||||
new_data = json.loads(res.text)
|
||||
|
||||
# Strip the array wrapper if the AI returned a list instead of a pure dict
|
||||
if isinstance(new_data, list) and len(new_data) > 0:
|
||||
new_data = new_data[0]
|
||||
|
||||
print(f"Successfully retrieved pricing for {target_model}.")
|
||||
print(new_data)
|
||||
return new_data
|
||||
@@ -96,21 +102,24 @@ def main():
|
||||
if os.path.exists(PRICING_FILE):
|
||||
try:
|
||||
with open(PRICING_FILE, "r") as f:
|
||||
file_pricing = json.load(f)
|
||||
pricing_data.update(file_pricing)
|
||||
pricing_data = json.load(f)
|
||||
except json.JSONDecodeError:
|
||||
print(f"Warning: {PRICING_FILE} is corrupted. Using defaults.", file=sys.stderr)
|
||||
print(f"Warning: {PRICING_FILE} is corrupted. Starting fresh.")
|
||||
|
||||
# Fetch pricing if forced, or if the model isn't currently tracked
|
||||
if args.pricing or args.model not in pricing_data:
|
||||
new_pricing = fetch_pricing_for_model(client, args.model)
|
||||
if new_pricing:
|
||||
|
||||
if new_pricing and args.model in new_pricing:
|
||||
pricing_data.update(new_pricing)
|
||||
with open(PRICING_FILE, "w") as f:
|
||||
json.dump(pricing_data, f, indent=4)
|
||||
elif not os.path.exists(PRICING_FILE):
|
||||
with open(PRICING_FILE, "w") as f:
|
||||
json.dump(pricing_data, f, indent=4)
|
||||
|
||||
else:
|
||||
print(f"Warning: Could not fetch pricing for {args.model}. Estimating cost at $0.00.", file=sys.stderr)
|
||||
# Add a fallback zero-value so the script doesn't crash during cost calculation
|
||||
pricing_data[args.model] = {"input": 0.0, "cached": 0.0, "output": 0.0}
|
||||
|
||||
# If the user only requested a pricing update and nothing else, exit cleanly
|
||||
if args.pricing and not prompt_text and not args.files and not args.destroy and not args.clear_history:
|
||||
return
|
||||
|
||||
@@ -308,7 +317,7 @@ def main():
|
||||
|
||||
config = types.GenerateContentConfig(**config_kwargs)
|
||||
|
||||
print("Generating response (this may take a moment for large outputs)...")
|
||||
print("Generating response (this may take a moment for large outputs)...\n")
|
||||
|
||||
response_stream = client.models.generate_content_stream(
|
||||
model=args.model,
|
||||
@@ -352,7 +361,9 @@ def main():
|
||||
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
|
||||
|
||||
uncached_tokens = max(0, prompt_tokens - cached_tokens)
|
||||
rates = pricing_data.get(args.model, pricing_data.get("default"))
|
||||
|
||||
# Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
|
||||
rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
|
||||
|
||||
input_cost = (uncached_tokens / 1_000_000) * rates["input"]
|
||||
cached_cost = (cached_tokens / 1_000_000) * rates["cached"]
|
||||
|
||||
Reference in New Issue
Block a user