Token tweak

2026-06-04 17:52:37 -07:00
parent 7e6a2c782b
commit 0f44ea375d
1 changed files with 6 additions and 8 deletions
@@ -312,7 +312,6 @@ def main():
                parts.append({"text": msg["text"]})
                api_contents.append({"role": msg["role"], "parts": parts})

-            # Add the current prompt
            current_parts = []
            if not files_added_to_payload and not active_cache_id and file_objects:
                for f in file_objects:
@@ -379,14 +378,13 @@ def main():
                output_tokens = usage_metadata.candidates_token_count or 0
                cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
                
-                # Handle API variations where prompt_token_count might strictly be the uncached tokens
-                if prompt_tokens >= cached_tokens:
-                    uncached_tokens = prompt_tokens - cached_tokens
-                else:
-                    uncached_tokens = prompt_tokens
-                    
-                total_input_tokens = uncached_tokens + cached_tokens
+                # Revert to max(0) to handle Google's padding discrepancy where prompt_tokens < cached_tokens
+                uncached_tokens = max(0, prompt_tokens - cached_tokens)
                
+                # Ensure the tier logic checks the absolute largest representation of the payload
+                total_input_tokens = max(prompt_tokens, cached_tokens)
+                
+                # Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
                rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
                
                input_rate = rates.get("input", 0.0)