Working on the gemini CLI
This commit is contained in:
+30
-11
@@ -103,14 +103,18 @@ def main():
|
|||||||
"and output raw data only. If not specific data format is suggested, you can answer with conversational text."
|
"and output raw data only. If not specific data format is suggested, you can answer with conversational text."
|
||||||
)
|
)
|
||||||
|
|
||||||
if context_data.get("file_ids"):
|
cache_too_small = False
|
||||||
if not context_data.get("cache_id"):
|
file_objects = []
|
||||||
print("Creating Context Cache on Google's servers...")
|
|
||||||
|
|
||||||
# Retrieve the file objects
|
if context_data.get("file_ids"):
|
||||||
|
# We always need the file objects just in case caching fails
|
||||||
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
|
file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
|
||||||
|
|
||||||
# Create the server-side cache (Set to expire in 3600 seconds / 60 minutes)
|
if not context_data.get("cache_id"):
|
||||||
|
print("Attempting to create Context Cache on Google's servers...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Attempt to create the cache
|
||||||
cache = client.caches.create(
|
cache = client.caches.create(
|
||||||
model=args.model,
|
model=args.model,
|
||||||
config=types.CreateCachedContentConfig(
|
config=types.CreateCachedContentConfig(
|
||||||
@@ -124,15 +128,26 @@ def main():
|
|||||||
json.dump(context_data, f, indent=4)
|
json.dump(context_data, f, indent=4)
|
||||||
print(f"Context Cache created: {cache.name}")
|
print(f"Context Cache created: {cache.name}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Catch the specific size error and fall back
|
||||||
|
if "too small" in str(e).lower() or "1024" in str(e):
|
||||||
|
print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
|
||||||
|
cache_too_small = True
|
||||||
else:
|
else:
|
||||||
|
raise e # Reraise if it's a different error (like authentication)
|
||||||
|
|
||||||
|
elif not cache_too_small:
|
||||||
print(f"Loading existing cache: {context_data['cache_id']}")
|
print(f"Loading existing cache: {context_data['cache_id']}")
|
||||||
print("Extending cache TTL by 60 minutes...")
|
print("Extending cache TTL by 60 minutes...")
|
||||||
|
try:
|
||||||
client.caches.update(
|
client.caches.update(
|
||||||
name=context_data["cache_id"],
|
name=context_data["cache_id"],
|
||||||
config=types.UpdateCachedContentConfig(
|
config=types.UpdateCachedContentConfig(
|
||||||
ttl="3600s"
|
ttl="3600s"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to update cache TTL. It may have expired. {e}")
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# GENERATION LOGIC
|
# GENERATION LOGIC
|
||||||
@@ -143,21 +158,25 @@ def main():
|
|||||||
"temperature": 0.0
|
"temperature": 0.0
|
||||||
}
|
}
|
||||||
|
|
||||||
# The new SDK passes the cache ID directly into the generation config.
|
# Setup contents array
|
||||||
# If caching is used, the system_instruction must be in the cache, not here.
|
generation_contents = []
|
||||||
if context_data.get("cache_id"):
|
|
||||||
|
if context_data.get("cache_id") and not cache_too_small:
|
||||||
|
# If we successfully cached, we just pass the cache ID in the config
|
||||||
config_kwargs["cached_content"] = context_data["cache_id"]
|
config_kwargs["cached_content"] = context_data["cache_id"]
|
||||||
else:
|
else:
|
||||||
|
# If we didn't cache (or it was too small), pass the files and system instruction directly
|
||||||
|
generation_contents.extend(file_objects)
|
||||||
config_kwargs["system_instruction"] = system_instruction
|
config_kwargs["system_instruction"] = system_instruction
|
||||||
|
|
||||||
|
generation_contents.append(prompt_text)
|
||||||
config = types.GenerateContentConfig(**config_kwargs)
|
config = types.GenerateContentConfig(**config_kwargs)
|
||||||
|
|
||||||
print("Generating response (this may take a moment for large outputs)...")
|
print("Generating response (this may take a moment for large outputs)...")
|
||||||
|
|
||||||
# We use stream generation so it writes immediately, avoiding memory bottlenecks
|
|
||||||
response_stream = client.models.generate_content_stream(
|
response_stream = client.models.generate_content_stream(
|
||||||
model=args.model,
|
model=args.model,
|
||||||
contents=prompt_text,
|
contents=generation_contents,
|
||||||
config=config
|
config=config
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -166,7 +185,7 @@ def main():
|
|||||||
for chunk in response_stream:
|
for chunk in response_stream:
|
||||||
if chunk.text:
|
if chunk.text:
|
||||||
f.write(chunk.text)
|
f.write(chunk.text)
|
||||||
f.flush() # Stream direct to the disk in real-time
|
f.flush()
|
||||||
print(f"\nDone! Raw output saved directly to {args.output}")
|
print(f"\nDone! Raw output saved directly to {args.output}")
|
||||||
else:
|
else:
|
||||||
print("-" * 40)
|
print("-" * 40)
|
||||||
|
|||||||
Reference in New Issue
Block a user