Working on the gemini CLI

2026-06-03 15:10:19 -07:00
parent 609f672e52
commit d2209fd209
1 changed files with 47 additions and 28 deletions
@@ -103,14 +103,18 @@ def main():
        "and output raw data only. If not specific data format is suggested, you can answer with conversational text."
    )
-    if context_data.get("file_ids"):
+    cache_too_small = False
-        if not context_data.get("cache_id"):
+    file_objects = []
            print("Creating Context Cache on Google's servers...")
-            # Retrieve the file objects
+    if context_data.get("file_ids"):
        # We always need the file objects just in case caching fails
        file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
-            # Create the server-side cache (Set to expire in 3600 seconds / 60 minutes)
+        if not context_data.get("cache_id"):
            print("Attempting to create Context Cache on Google's servers...")
            try:
                # Attempt to create the cache
                cache = client.caches.create(
                    model=args.model,
                    config=types.CreateCachedContentConfig(
@@ -124,15 +128,26 @@ def main():
                    json.dump(context_data, f, indent=4)
                print(f"Context Cache created: {cache.name}")
            except Exception as e:
                # Catch the specific size error and fall back
                if "too small" in str(e).lower() or "1024" in str(e):
                    print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
                    cache_too_small = True
                else:
                    raise e # Reraise if it's a different error (like authentication)
        elif not cache_too_small:
            print(f"Loading existing cache: {context_data['cache_id']}")
            print("Extending cache TTL by 60 minutes...")
            try:
                client.caches.update(
                    name=context_data["cache_id"],
                    config=types.UpdateCachedContentConfig(
                        ttl="3600s"
                    )
                )
            except Exception as e:
                 print(f"Warning: Failed to update cache TTL. It may have expired. {e}")
    # ---------------------------------------------------------
    # GENERATION LOGIC
@@ -143,21 +158,25 @@ def main():
            "temperature": 0.0
        }
-        # The new SDK passes the cache ID directly into the generation config.
+        # Setup contents array
-        # If caching is used, the system_instruction must be in the cache, not here.
+        generation_contents = []
-        if context_data.get("cache_id"):
+        
        if context_data.get("cache_id") and not cache_too_small:
            # If we successfully cached, we just pass the cache ID in the config
            config_kwargs["cached_content"] = context_data["cache_id"]
        else:
            # If we didn't cache (or it was too small), pass the files and system instruction directly
            generation_contents.extend(file_objects)
            config_kwargs["system_instruction"] = system_instruction
        generation_contents.append(prompt_text)
        config = types.GenerateContentConfig(**config_kwargs)
        print("Generating response (this may take a moment for large outputs)...")
        # We use stream generation so it writes immediately, avoiding memory bottlenecks
        response_stream = client.models.generate_content_stream(
            model=args.model,
-            contents=prompt_text,
+            contents=generation_contents,
            config=config
        )
@@ -166,7 +185,7 @@ def main():
                for chunk in response_stream:
                    if chunk.text:
                        f.write(chunk.text)
-                        f.flush() # Stream direct to the disk in real-time
+                        f.flush()
            print(f"\nDone! Raw output saved directly to {args.output}")
        else:
            print("-" * 40)