Added the >200k tier. Fixed problems with using a non-existant model
This commit is contained in:
+63
-32
@@ -23,16 +23,23 @@ def fetch_pricing_for_model(client, target_model):
|
|||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Extract the API pricing for the model '{target_model}' from the following HTML text.
|
Extract the API pricing for the model '{target_model}' from the following HTML text.
|
||||||
Find the cost per 1 million input tokens, 1 million cached content tokens, and 1 million output tokens.
|
Find the cost per 1 million tokens for input, cached content, and output.
|
||||||
Return ONLY a valid JSON object with this exact structure:
|
Many models have a split tier where the cost increases if the prompt exceeds 200k tokens.
|
||||||
|
|
||||||
|
CRITICAL: If the model '{target_model}' is definitively NOT found in the HTML data, return an empty JSON object: {{}}
|
||||||
|
|
||||||
|
Otherwise, return ONLY a valid JSON object with this exact structure:
|
||||||
{{
|
{{
|
||||||
"{target_model}": {{
|
"{target_model}": {{
|
||||||
"input": 0.00,
|
"input": 0.00,
|
||||||
"cached": 0.00,
|
"cached": 0.00,
|
||||||
"output": 0.00
|
"output": 0.00,
|
||||||
|
"input_over_200k": 0.00,
|
||||||
|
"cached_over_200k": 0.00,
|
||||||
|
"output_over_200k": 0.00
|
||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
If a value is not found, use 0.00.
|
If a tier value is not found, duplicate the base tier values.
|
||||||
|
|
||||||
HTML DATA:
|
HTML DATA:
|
||||||
{html}
|
{html}
|
||||||
@@ -104,7 +111,7 @@ def main():
|
|||||||
with open(PRICING_FILE, "r") as f:
|
with open(PRICING_FILE, "r") as f:
|
||||||
pricing_data = json.load(f)
|
pricing_data = json.load(f)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print(f"Warning: {PRICING_FILE} is corrupted. Starting fresh.")
|
print(f"Warning: {PRICING_FILE} is corrupted. Starting fresh.", file=sys.stderr)
|
||||||
|
|
||||||
# Fetch pricing if forced, or if the model isn't currently tracked
|
# Fetch pricing if forced, or if the model isn't currently tracked
|
||||||
if args.pricing or args.model not in pricing_data:
|
if args.pricing or args.model not in pricing_data:
|
||||||
@@ -319,38 +326,49 @@ def main():
|
|||||||
|
|
||||||
print("Generating response (this may take a moment for large outputs)...\n")
|
print("Generating response (this may take a moment for large outputs)...\n")
|
||||||
|
|
||||||
response_stream = client.models.generate_content_stream(
|
|
||||||
model=args.model,
|
|
||||||
contents=api_contents,
|
|
||||||
config=config
|
|
||||||
)
|
|
||||||
|
|
||||||
full_response_text = ""
|
full_response_text = ""
|
||||||
usage_metadata = None
|
usage_metadata = None
|
||||||
finish_reason_str = "UNKNOWN"
|
finish_reason_str = "UNKNOWN"
|
||||||
|
|
||||||
if args.output:
|
try:
|
||||||
with open(args.output, "w") as f:
|
response_stream = client.models.generate_content_stream(
|
||||||
|
model=args.model,
|
||||||
|
contents=api_contents,
|
||||||
|
config=config
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w") as f:
|
||||||
|
for chunk in response_stream:
|
||||||
|
if chunk.text:
|
||||||
|
f.write(chunk.text)
|
||||||
|
f.flush()
|
||||||
|
full_response_text += chunk.text
|
||||||
|
if chunk.usage_metadata:
|
||||||
|
usage_metadata = chunk.usage_metadata
|
||||||
|
if chunk.candidates and chunk.candidates[0].finish_reason:
|
||||||
|
finish_reason_str = chunk.candidates[0].finish_reason.name
|
||||||
|
print(f"Done! Raw output saved directly to {args.output}", file=sys.stderr)
|
||||||
|
else:
|
||||||
for chunk in response_stream:
|
for chunk in response_stream:
|
||||||
if chunk.text:
|
if chunk.text:
|
||||||
f.write(chunk.text)
|
print(chunk.text, end="", flush=True)
|
||||||
f.flush()
|
|
||||||
full_response_text += chunk.text
|
full_response_text += chunk.text
|
||||||
if chunk.usage_metadata:
|
if chunk.usage_metadata:
|
||||||
usage_metadata = chunk.usage_metadata
|
usage_metadata = chunk.usage_metadata
|
||||||
if chunk.candidates and chunk.candidates[0].finish_reason:
|
if chunk.candidates and chunk.candidates[0].finish_reason:
|
||||||
finish_reason_str = chunk.candidates[0].finish_reason.name
|
finish_reason_str = chunk.candidates[0].finish_reason.name
|
||||||
print(f"Done! Raw output saved directly to {args.output}")
|
print()
|
||||||
else:
|
|
||||||
for chunk in response_stream:
|
except Exception as e:
|
||||||
if chunk.text:
|
# Catch the 404 Model Not Found error (or other API failures) gracefully
|
||||||
print(chunk.text, end="", flush=True)
|
if "404" in str(e) and "NOT_FOUND" in str(e):
|
||||||
full_response_text += chunk.text
|
print(f"\n[Error] The model '{args.model}' does not exist or is not available.", file=sys.stderr)
|
||||||
if chunk.usage_metadata:
|
else:
|
||||||
usage_metadata = chunk.usage_metadata
|
print(f"\n[API Error] {e}", file=sys.stderr)
|
||||||
if chunk.candidates and chunk.candidates[0].finish_reason:
|
|
||||||
finish_reason_str = chunk.candidates[0].finish_reason.name
|
# Exit cleanly so we don't calculate costs or save bad history
|
||||||
print()
|
return
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# USAGE AND COST CALCULATION
|
# USAGE AND COST CALCULATION
|
||||||
@@ -361,18 +379,31 @@ def main():
|
|||||||
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
|
cached_tokens = getattr(usage_metadata, 'cached_content_token_count', 0) or 0
|
||||||
|
|
||||||
uncached_tokens = max(0, prompt_tokens - cached_tokens)
|
uncached_tokens = max(0, prompt_tokens - cached_tokens)
|
||||||
|
rates = pricing_data.get(args.model, {})
|
||||||
|
|
||||||
# Fetch the rate dynamically from the parsed JSON or default to 0.0 if fetch failed
|
input_rate = rates.get("input", 0.0)
|
||||||
rates = pricing_data.get(args.model, {"input": 0.0, "cached": 0.0, "output": 0.0})
|
cached_rate = rates.get("cached", 0.0)
|
||||||
|
output_rate = rates.get("output", 0.0)
|
||||||
|
tier_label = "Base Tier"
|
||||||
|
|
||||||
input_cost = (uncached_tokens / 1_000_000) * rates["input"]
|
# Check if prompt exceeded 200k tokens to apply tier pricing
|
||||||
cached_cost = (cached_tokens / 1_000_000) * rates["cached"]
|
if prompt_tokens > 200_000 and "input_over_200k" in rates:
|
||||||
output_cost = (output_tokens / 1_000_000) * rates["output"]
|
if rates.get("input_over_200k", 0.0) > 0:
|
||||||
|
input_rate = rates["input_over_200k"]
|
||||||
|
tier_label = ">200k Tier"
|
||||||
|
if rates.get("cached_over_200k", 0.0) > 0:
|
||||||
|
cached_rate = rates["cached_over_200k"]
|
||||||
|
if rates.get("output_over_200k", 0.0) > 0:
|
||||||
|
output_rate = rates["output_over_200k"]
|
||||||
|
|
||||||
|
input_cost = (uncached_tokens / 1_000_000) * input_rate
|
||||||
|
cached_cost = (cached_tokens / 1_000_000) * cached_rate
|
||||||
|
output_cost = (output_tokens / 1_000_000) * output_rate
|
||||||
total_cost = input_cost + cached_cost + output_cost
|
total_cost = input_cost + cached_cost + output_cost
|
||||||
|
|
||||||
print("\n[--- Execution Summary ---]")
|
print("\n[--- Execution Summary ---]")
|
||||||
print(f"Finish Reason: {finish_reason_str}")
|
print(f"Finish Reason: {finish_reason_str}")
|
||||||
print(f"Token Usage: Input: {uncached_tokens:,} | Cached: {cached_tokens:,} | Output: {output_tokens:,}")
|
print(f"Token Usage: Input: {uncached_tokens:,} | Cached: {cached_tokens:,} | Output: {output_tokens:,} ({tier_label})")
|
||||||
print(f"Est. Cost: ${total_cost:.6f} (Model: {args.model})")
|
print(f"Est. Cost: ${total_cost:.6f} (Model: {args.model})")
|
||||||
|
|
||||||
context_data["history"].append({"role": "user", "text": prompt_text})
|
context_data["history"].append({"role": "user", "text": prompt_text})
|
||||||
|
|||||||
Reference in New Issue
Block a user