diff --git a/gemini/gemini.py b/gemini/gemini.py index 1b4c008..5a74d21 100644 --- a/gemini/gemini.py +++ b/gemini/gemini.py @@ -312,7 +312,6 @@ def main(): parts.append({"text": msg["text"]}) api_contents.append({"role": msg["role"], "parts": parts}) - # Add the current prompt current_parts = [] if not files_added_to_payload and not active_cache_id and file_objects: for f in file_objects: @@ -379,14 +378,13 @@ def main(): output_tokens = usage_metadata.candidates_token_count or 0 cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0 - # Handle API variations where prompt_token_count might strictly be the uncached tokens - if prompt_tokens >= cached_tokens: - uncached_tokens = prompt_tokens - cached_tokens - else: - uncached_tokens = prompt_tokens - - total_input_tokens = uncached_tokens + cached_tokens + # Revert to max(0) to handle Google's padding discrepancy where prompt_tokens < cached_tokens + uncached_tokens = max(0, prompt_tokens - cached_tokens) + # Ensure the tier logic checks the absolute largest representation of the payload + total_input_tokens = max(prompt_tokens, cached_tokens) + + # Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0}) input_rate = rates.get("input", 0.0)