Fixed pricing searches. Still not working for >200k tokens

2026-06-04 17:00:43 -07:00
parent 9c2dd68a28
commit 5c588cbe4f
1 changed files with 21 additions and 10 deletions
@@ -45,12 +45,18 @@ def fetch_pricing_for_model(client, target_model):
    
    print("Parsing pricing data via background AI session...")
    try:
+        # We use a fast, cheap model just for the parsing task
        res = client.models.generate_content(
            model="gemini-3.1-flash-lite", 
            contents=prompt,
            config=config
        )
        new_data = json.loads(res.text)
+        
+        # Strip the array wrapper if the AI returned a list instead of a pure dict
+        if isinstance(new_data, list) and len(new_data) > 0:
+            new_data = new_data[0]
+            
        print(f"Successfully retrieved pricing for {target_model}.")
        print(new_data)
        return new_data
@@ -96,21 +102,24 @@ def main():
    if os.path.exists(PRICING_FILE):
        try:
            with open(PRICING_FILE, "r") as f:
-                file_pricing = json.load(f)
-                pricing_data.update(file_pricing)
+                pricing_data = json.load(f)
        except json.JSONDecodeError:
-            print(f"Warning: {PRICING_FILE} is corrupted. Using defaults.", file=sys.stderr)
+            print(f"Warning: {PRICING_FILE} is corrupted. Starting fresh.")

+    # Fetch pricing if forced, or if the model isn't currently tracked
    if args.pricing or args.model not in pricing_data:
        new_pricing = fetch_pricing_for_model(client, args.model)
-        if new_pricing:
+        
+        if new_pricing and args.model in new_pricing:
            pricing_data.update(new_pricing)
            with open(PRICING_FILE, "w") as f:
                json.dump(pricing_data, f, indent=4)
-        elif not os.path.exists(PRICING_FILE):
-            with open(PRICING_FILE, "w") as f:
-                json.dump(pricing_data, f, indent=4)
-                
+        else:
+            print(f"Warning: Could not fetch pricing for {args.model}. Estimating cost at $0.00.", file=sys.stderr)
+            # Add a fallback zero-value so the script doesn't crash during cost calculation
+            pricing_data[args.model] = {"input": 0.0, "cached": 0.0, "output": 0.0}
+            
+        # If the user only requested a pricing update and nothing else, exit cleanly
        if args.pricing and not prompt_text and not args.files and not args.destroy and not args.clear_history:
            return
            
@@ -308,7 +317,7 @@ def main():

            config = types.GenerateContentConfig(**config_kwargs)

-            print("Generating response (this may take a moment for large outputs)...")
+            print("Generating response (this may take a moment for large outputs)...\n")
            
            response_stream = client.models.generate_content_stream(
                model=args.model,
@@ -352,7 +361,9 @@ def main():
                cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
                
                uncached_tokens = max(0, prompt_tokens - cached_tokens)
-                rates = pricing_data.get(args.model, pricing_data.get("default"))
+                
+                # Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
+                rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
                
                input_cost = (uncached_tokens / 1_000_000) * rates["input"]
                cached_cost = (cached_tokens / 1_000_000) * rates["cached"]