Working on the gemini CLI

This commit is contained in:
2026-06-03 15:10:19 -07:00
parent 609f672e52
commit d2209fd209
+47 -28
View File
@@ -103,36 +103,51 @@ def main():
"and output raw data only. If not specific data format is suggested, you can answer with conversational text."
)
cache_too_small = False
file_objects = []
if context_data.get("file_ids"):
# We always need the file objects just in case caching fails
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
if not context_data.get("cache_id"):
print("Creating Context Cache on Google's servers...")
print("Attempting to create Context Cache on Google's servers...")
# Retrieve the file objects
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
# Create the server-side cache (Set to expire in 3600 seconds / 60 minutes)
cache = client.caches.create(
model=args.model,
config=types.CreateCachedContentConfig(
contents=file_objects,
system_instruction=system_instruction,
ttl="3600s"
try:
# Attempt to create the cache
cache = client.caches.create(
model=args.model,
config=types.CreateCachedContentConfig(
contents=file_objects,
system_instruction=system_instruction,
ttl="3600s"
)
)
)
context_data["cache_id"] = cache.name
with open(args.context, "w") as f:
json.dump(context_data, f, indent=4)
print(f"Context Cache created: {cache.name}")
context_data["cache_id"] = cache.name
with open(args.context, "w") as f:
json.dump(context_data, f, indent=4)
print(f"Context Cache created: {cache.name}")
else:
except Exception as e:
# Catch the specific size error and fall back
if "too small" in str(e).lower() or "1024" in str(e):
print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
cache_too_small = True
else:
raise e # Reraise if it's a different error (like authentication)
elif not cache_too_small:
print(f"Loading existing cache: {context_data['cache_id']}")
print("Extending cache TTL by 60 minutes...")
client.caches.update(
name=context_data["cache_id"],
config=types.UpdateCachedContentConfig(
ttl="3600s"
try:
client.caches.update(
name=context_data["cache_id"],
config=types.UpdateCachedContentConfig(
ttl="3600s"
)
)
)
except Exception as e:
print(f"Warning: Failed to update cache TTL. It may have expired. {e}")
# ---------------------------------------------------------
# GENERATION LOGIC
@@ -143,21 +158,25 @@ def main():
"temperature": 0.0
}
# The new SDK passes the cache ID directly into the generation config.
# If caching is used, the system_instruction must be in the cache, not here.
if context_data.get("cache_id"):
# Setup contents array
generation_contents = []
if context_data.get("cache_id") and not cache_too_small:
# If we successfully cached, we just pass the cache ID in the config
config_kwargs["cached_content"] = context_data["cache_id"]
else:
# If we didn't cache (or it was too small), pass the files and system instruction directly
generation_contents.extend(file_objects)
config_kwargs["system_instruction"] = system_instruction
generation_contents.append(prompt_text)
config = types.GenerateContentConfig(**config_kwargs)
print("Generating response (this may take a moment for large outputs)...")
# We use stream generation so it writes immediately, avoiding memory bottlenecks
response_stream = client.models.generate_content_stream(
model=args.model,
contents=prompt_text,
contents=generation_contents,
config=config
)
@@ -166,7 +185,7 @@ def main():
for chunk in response_stream:
if chunk.text:
f.write(chunk.text)
f.flush() # Stream direct to the disk in real-time
f.flush()
print(f"\nDone! Raw output saved directly to {args.output}")
else:
print("-" * 40)