Working on the gemini CLI

This commit is contained in:
2026-06-03 15:10:19 -07:00
parent 609f672e52
commit d2209fd209
+30 -11
View File
@@ -103,14 +103,18 @@ def main():
"and output raw data only. If not specific data format is suggested, you can answer with conversational text." "and output raw data only. If not specific data format is suggested, you can answer with conversational text."
) )
if context_data.get("file_ids"): cache_too_small = False
if not context_data.get("cache_id"): file_objects = []
print("Creating Context Cache on Google's servers...")
# Retrieve the file objects if context_data.get("file_ids"):
# We always need the file objects just in case caching fails
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]] file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
# Create the server-side cache (Set to expire in 3600 seconds / 60 minutes) if not context_data.get("cache_id"):
print("Attempting to create Context Cache on Google's servers...")
try:
# Attempt to create the cache
cache = client.caches.create( cache = client.caches.create(
model=args.model, model=args.model,
config=types.CreateCachedContentConfig( config=types.CreateCachedContentConfig(
@@ -124,15 +128,26 @@ def main():
json.dump(context_data, f, indent=4) json.dump(context_data, f, indent=4)
print(f"Context Cache created: {cache.name}") print(f"Context Cache created: {cache.name}")
except Exception as e:
# Catch the specific size error and fall back
if "too small" in str(e).lower() or "1024" in str(e):
print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
cache_too_small = True
else: else:
raise e # Reraise if it's a different error (like authentication)
elif not cache_too_small:
print(f"Loading existing cache: {context_data['cache_id']}") print(f"Loading existing cache: {context_data['cache_id']}")
print("Extending cache TTL by 60 minutes...") print("Extending cache TTL by 60 minutes...")
try:
client.caches.update( client.caches.update(
name=context_data["cache_id"], name=context_data["cache_id"],
config=types.UpdateCachedContentConfig( config=types.UpdateCachedContentConfig(
ttl="3600s" ttl="3600s"
) )
) )
except Exception as e:
print(f"Warning: Failed to update cache TTL. It may have expired. {e}")
# --------------------------------------------------------- # ---------------------------------------------------------
# GENERATION LOGIC # GENERATION LOGIC
@@ -143,21 +158,25 @@ def main():
"temperature": 0.0 "temperature": 0.0
} }
# The new SDK passes the cache ID directly into the generation config. # Setup contents array
# If caching is used, the system_instruction must be in the cache, not here. generation_contents = []
if context_data.get("cache_id"):
if context_data.get("cache_id") and not cache_too_small:
# If we successfully cached, we just pass the cache ID in the config
config_kwargs["cached_content"] = context_data["cache_id"] config_kwargs["cached_content"] = context_data["cache_id"]
else: else:
# If we didn't cache (or it was too small), pass the files and system instruction directly
generation_contents.extend(file_objects)
config_kwargs["system_instruction"] = system_instruction config_kwargs["system_instruction"] = system_instruction
generation_contents.append(prompt_text)
config = types.GenerateContentConfig(**config_kwargs) config = types.GenerateContentConfig(**config_kwargs)
print("Generating response (this may take a moment for large outputs)...") print("Generating response (this may take a moment for large outputs)...")
# We use stream generation so it writes immediately, avoiding memory bottlenecks
response_stream = client.models.generate_content_stream( response_stream = client.models.generate_content_stream(
model=args.model, model=args.model,
contents=prompt_text, contents=generation_contents,
config=config config=config
) )
@@ -166,7 +185,7 @@ def main():
for chunk in response_stream: for chunk in response_stream:
if chunk.text: if chunk.text:
f.write(chunk.text) f.write(chunk.text)
f.flush() # Stream direct to the disk in real-time f.flush()
print(f"\nDone! Raw output saved directly to {args.output}") print(f"\nDone! Raw output saved directly to {args.output}")
else: else:
print("-" * 40) print("-" * 40)