Files
monorepo/gemini/gemini.py
T

238 lines
11 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import os
import sys
from google import genai
from google.genai import types
def main():
parser = argparse.ArgumentParser(description="Gemini API CLI with File & Context Caching")
parser.add_argument("-c", "--context", type=str, default=None,
help="Path to context file. If omitted, files/caches are deleted after execution.")
parser.add_argument("-f", "--files", nargs="+", default=[],
help="Files to upload to the Gemini API")
parser.add_argument("-m", "--model", type=str, default="gemini-3.1-flash-lite",
help="The model to use (default: gemini-3.1-flash-lite)")
parser.add_argument("-d", "--destroy", action="store_true",
help="Destroy the cloud files and cache, and delete the local context file")
parser.add_argument("-o", "--output", type=str,
help="Direct the raw output to a specific file instead of stdout")
parser.add_argument("-p", "--prompt", type=str,
help="The prompt to send to the AI")
parser.add_argument("positional_prompt", nargs=argparse.REMAINDER,
help="Positional arguments treated as the prompt if -p is omitted")
args = parser.parse_args()
prompt_text = args.prompt if args.prompt else " ".join(args.positional_prompt) if args.positional_prompt else None
if not os.environ.get("GEMINI_API_KEY"):
print("Error: GEMINI_API_KEY environment variable is not set.", file=sys.stderr)
sys.exit(1)
client = genai.Client()
# Updated structure to track caches and their associated file states by model
context_data = {"file_ids": [], "caches": {}}
if args.context and os.path.exists(args.context):
try:
with open(args.context, "r") as f:
loaded_data = json.load(f)
context_data["file_ids"] = loaded_data.get("file_ids", [])
context_data["caches"] = loaded_data.get("caches", {})
# Handle migration from old format
if "cache_id" in loaded_data and loaded_data["cache_id"]:
print("Warning: Old context format detected. Please use -d to destroy and start fresh, or unexpected cache behavior may occur.", file=sys.stderr)
except json.JSONDecodeError:
print(f"Warning: Could not parse {args.context}. Starting fresh.", file=sys.stderr)
# ---------------------------------------------------------
# DESTROY FLAG LOGIC
# ---------------------------------------------------------
if args.destroy:
print("Destroying server resources and local context...")
for model_name, cache_info in context_data.get("caches", {}).items():
try:
client.caches.delete(name=cache_info["cache_id"])
print(f"Deleted cache for {model_name}: {cache_info['cache_id']}")
except Exception as e:
print(f"Warning: Failed to delete cache for {model_name}. {e}", file=sys.stderr)
for file_id in context_data.get("file_ids", []):
try:
client.files.delete(name=file_id)
print(f"Deleted file: {file_id}")
except Exception as e:
print(f"Warning: Failed to delete file '{file_id}'. {e}", file=sys.stderr)
if args.context and os.path.exists(args.context):
os.remove(args.context)
print(f"Deleted local context file: {args.context}")
print("Cleanup complete.")
return
try:
# ---------------------------------------------------------
# UPLOAD LOGIC
# ---------------------------------------------------------
if args.files:
for file_path in args.files:
if not os.path.exists(file_path):
print(f"Warning: File '{file_path}' not found. Skipping.", file=sys.stderr)
continue
print(f"Uploading '{file_path}'...")
uploaded_file = client.files.upload(file=file_path)
print(f"Success: '{file_path}' uploaded as '{uploaded_file.name}'")
if uploaded_file.name not in context_data["file_ids"]:
context_data["file_ids"].append(uploaded_file.name)
if args.context:
with open(args.context, "w") as f:
json.dump(context_data, f, indent=4)
# ---------------------------------------------------------
# CACHE CREATION LOGIC
# ---------------------------------------------------------
system_instruction = (
"You are a hybrid data extraction tool. If a specific format or file format output is requested "
"(e.g., CSV), then output exactly what was requested and never use markdown formatting blocks (like ```csv). "
"If a specific format was requested, never include conversational text, greetings, or explanations; "
"and output raw data only. If not specific data format is suggested, you can answer with conversational text."
)
cache_too_small = False
file_objects = []
active_cache_id = None
if context_data.get("file_ids"):
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
model_cache_info = context_data["caches"].get(args.model)
current_files_set = set(context_data["file_ids"])
rebuild_cache = False
if model_cache_info:
cached_files_set = set(model_cache_info.get("cached_file_ids", []))
if current_files_set != cached_files_set:
print(f"File list changed. Destroying stale {args.model} cache to rebuild...")
try:
client.caches.delete(name=model_cache_info["cache_id"])
except Exception as e:
print(f"Warning: Could not delete stale cache. {e}", file=sys.stderr)
rebuild_cache = True
else:
rebuild_cache = True
if rebuild_cache:
print(f"Attempting to create Context Cache for {args.model}...")
try:
cache = client.caches.create(
model=args.model,
config=types.CreateCachedContentConfig(
contents=file_objects,
system_instruction=system_instruction,
ttl="3600s"
)
)
context_data["caches"][args.model] = {
"cache_id": cache.name,
"cached_file_ids": list(context_data["file_ids"])
}
active_cache_id = cache.name
if args.context:
with open(args.context, "w") as f:
json.dump(context_data, f, indent=4)
print(f"Context Cache created: {cache.name}")
except Exception as e:
if "too small" in str(e).lower() or "1024" in str(e):
print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
cache_too_small = True
else:
raise e
elif not cache_too_small and model_cache_info:
active_cache_id = model_cache_info["cache_id"]
print(f"Loading existing cache for {args.model}: {active_cache_id}")
print("Extending cache TTL by 60 minutes...")
try:
client.caches.update(
name=active_cache_id,
config=types.UpdateCachedContentConfig(ttl="3600s")
)
except Exception as e:
print(f"Warning: Failed to update cache TTL. {e}")
# ---------------------------------------------------------
# GENERATION LOGIC
# ---------------------------------------------------------
if prompt_text:
config_kwargs = {
"max_output_tokens": 65536,
"temperature": 0.0
}
generation_contents = []
if active_cache_id and not cache_too_small:
config_kwargs["cached_content"] = active_cache_id
else:
generation_contents.extend(file_objects)
config_kwargs["system_instruction"] = system_instruction
generation_contents.append(prompt_text)
config = types.GenerateContentConfig(**config_kwargs)
print("Generating response (this may take a moment for large outputs)...")
response_stream = client.models.generate_content_stream(
model=args.model,
contents=generation_contents,
config=config
)
if args.output:
with open(args.output, "w") as f:
for chunk in response_stream:
if chunk.text:
f.write(chunk.text)
f.flush()
print(f"\nDone! Raw output saved directly to {args.output}")
else:
print("-" * 40)
for chunk in response_stream:
if chunk.text:
print(chunk.text, end="", flush=True)
print("\n" + "-" * 40)
finally:
# ---------------------------------------------------------
# TRANSIENT MODE CLEANUP
# ---------------------------------------------------------
if not args.context and not args.destroy:
print("\n[Transient Mode] Cleaning up resources...")
for model_name, cache_info in context_data.get("caches", {}).items():
try:
client.caches.delete(name=cache_info["cache_id"])
print(f"Deleted cache for {model_name}: {cache_info['cache_id']}")
except Exception as e:
print(f"Warning: Failed to delete cache. {e}", file=sys.stderr)
for file_id in context_data.get("file_ids", []):
try:
client.files.delete(name=file_id)
print(f"Deleted file: {file_id}")
except Exception as e:
print(f"Warning: Failed to delete file '{file_id}'. {e}", file=sys.stderr)
if __name__ == "__main__":
main()