Token tweak
This commit is contained in:
+6
-8
@@ -312,7 +312,6 @@ def main():
|
|||||||
parts.append({"text": msg["text"]})
|
parts.append({"text": msg["text"]})
|
||||||
api_contents.append({"role": msg["role"], "parts": parts})
|
api_contents.append({"role": msg["role"], "parts": parts})
|
||||||
|
|
||||||
# Add the current prompt
|
|
||||||
current_parts = []
|
current_parts = []
|
||||||
if not files_added_to_payload and not active_cache_id and file_objects:
|
if not files_added_to_payload and not active_cache_id and file_objects:
|
||||||
for f in file_objects:
|
for f in file_objects:
|
||||||
@@ -379,14 +378,13 @@ def main():
|
|||||||
output_tokens = usage_metadata.candidates_token_count or 0
|
output_tokens = usage_metadata.candidates_token_count or 0
|
||||||
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
|
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
|
||||||
|
|
||||||
# Handle API variations where prompt_token_count might strictly be the uncached tokens
|
# Revert to max(0) to handle Google's padding discrepancy where prompt_tokens < cached_tokens
|
||||||
if prompt_tokens >= cached_tokens:
|
uncached_tokens = max(0, prompt_tokens - cached_tokens)
|
||||||
uncached_tokens = prompt_tokens - cached_tokens
|
|
||||||
else:
|
|
||||||
uncached_tokens = prompt_tokens
|
|
||||||
|
|
||||||
total_input_tokens = uncached_tokens + cached_tokens
|
|
||||||
|
|
||||||
|
# Ensure the tier logic checks the absolute largest representation of the payload
|
||||||
|
total_input_tokens = max(prompt_tokens, cached_tokens)
|
||||||
|
|
||||||
|
# Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
|
||||||
rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
|
rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
|
||||||
|
|
||||||
input_rate = rates.get("input", 0.0)
|
input_rate = rates.get("input", 0.0)
|
||||||
|
|||||||
Reference in New Issue
Block a user