Working on the gemini CLI
This commit is contained in:
+30
-11
@@ -103,14 +103,18 @@ def main():
|
||||
"and output raw data only. If not specific data format is suggested, you can answer with conversational text."
|
||||
)
|
||||
|
||||
if context_data.get("file_ids"):
|
||||
if not context_data.get("cache_id"):
|
||||
print("Creating Context Cache on Google's servers...")
|
||||
cache_too_small = False
|
||||
file_objects = []
|
||||
|
||||
# Retrieve the file objects
|
||||
if context_data.get("file_ids"):
|
||||
# We always need the file objects just in case caching fails
|
||||
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
|
||||
|
||||
# Create the server-side cache (Set to expire in 3600 seconds / 60 minutes)
|
||||
if not context_data.get("cache_id"):
|
||||
print("Attempting to create Context Cache on Google's servers...")
|
||||
|
||||
try:
|
||||
# Attempt to create the cache
|
||||
cache = client.caches.create(
|
||||
model=args.model,
|
||||
config=types.CreateCachedContentConfig(
|
||||
@@ -124,15 +128,26 @@ def main():
|
||||
json.dump(context_data, f, indent=4)
|
||||
print(f"Context Cache created: {cache.name}")
|
||||
|
||||
except Exception as e:
|
||||
# Catch the specific size error and fall back
|
||||
if "too small" in str(e).lower() or "1024" in str(e):
|
||||
print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
|
||||
cache_too_small = True
|
||||
else:
|
||||
raise e # Reraise if it's a different error (like authentication)
|
||||
|
||||
elif not cache_too_small:
|
||||
print(f"Loading existing cache: {context_data['cache_id']}")
|
||||
print("Extending cache TTL by 60 minutes...")
|
||||
try:
|
||||
client.caches.update(
|
||||
name=context_data["cache_id"],
|
||||
config=types.UpdateCachedContentConfig(
|
||||
ttl="3600s"
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to update cache TTL. It may have expired. {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# GENERATION LOGIC
|
||||
@@ -143,21 +158,25 @@ def main():
|
||||
"temperature": 0.0
|
||||
}
|
||||
|
||||
# The new SDK passes the cache ID directly into the generation config.
|
||||
# If caching is used, the system_instruction must be in the cache, not here.
|
||||
if context_data.get("cache_id"):
|
||||
# Setup contents array
|
||||
generation_contents = []
|
||||
|
||||
if context_data.get("cache_id") and not cache_too_small:
|
||||
# If we successfully cached, we just pass the cache ID in the config
|
||||
config_kwargs["cached_content"] = context_data["cache_id"]
|
||||
else:
|
||||
# If we didn't cache (or it was too small), pass the files and system instruction directly
|
||||
generation_contents.extend(file_objects)
|
||||
config_kwargs["system_instruction"] = system_instruction
|
||||
|
||||
generation_contents.append(prompt_text)
|
||||
config = types.GenerateContentConfig(**config_kwargs)
|
||||
|
||||
print("Generating response (this may take a moment for large outputs)...")
|
||||
|
||||
# We use stream generation so it writes immediately, avoiding memory bottlenecks
|
||||
response_stream = client.models.generate_content_stream(
|
||||
model=args.model,
|
||||
contents=prompt_text,
|
||||
contents=generation_contents,
|
||||
config=config
|
||||
)
|
||||
|
||||
@@ -166,7 +185,7 @@ def main():
|
||||
for chunk in response_stream:
|
||||
if chunk.text:
|
||||
f.write(chunk.text)
|
||||
f.flush() # Stream direct to the disk in real-time
|
||||
f.flush()
|
||||
print(f"\nDone! Raw output saved directly to {args.output}")
|
||||
else:
|
||||
print("-" * 40)
|
||||
|
||||
Reference in New Issue
Block a user