Working on the gemini CLI

2026-06-03 15:10:19 -07:00
parent 609f672e52
commit d2209fd209
1 changed files with 47 additions and 28 deletions
@@ -103,36 +103,51 @@ def main():
        "and output raw data only. If not specific data format is suggested, you can answer with conversational text."
    )

+    cache_too_small = False
+    file_objects = []
+
    if context_data.get("file_ids"):
+        # We always need the file objects just in case caching fails
+        file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
+
        if not context_data.get("cache_id"):
-            print("Creating Context Cache on Google's servers...")
+            print("Attempting to create Context Cache on Google's servers...")
            
-            # Retrieve the file objects
-            file_objects = [client.files.get(name=f_id) for f_id in context_data["file_ids"]]
-            
-            # Create the server-side cache (Set to expire in 3600 seconds / 60 minutes)
-            cache = client.caches.create(
-                model=args.model,
-                config=types.CreateCachedContentConfig(
-                    contents=file_objects,
-                    system_instruction=system_instruction,
-                    ttl="3600s" 
+            try:
+                # Attempt to create the cache
+                cache = client.caches.create(
+                    model=args.model,
+                    config=types.CreateCachedContentConfig(
+                        contents=file_objects,
+                        system_instruction=system_instruction,
+                        ttl="3600s" 
+                    )
                )
-            )
-            context_data["cache_id"] = cache.name
-            with open(args.context, "w") as f:
-                json.dump(context_data, f, indent=4)
-            print(f"Context Cache created: {cache.name}")
+                context_data["cache_id"] = cache.name
+                with open(args.context, "w") as f:
+                    json.dump(context_data, f, indent=4)
+                print(f"Context Cache created: {cache.name}")
            
-        else:
+            except Exception as e:
+                # Catch the specific size error and fall back
+                if "too small" in str(e).lower() or "1024" in str(e):
+                    print("Notice: Files are too small for server-side caching (under 1024 tokens). Falling back to standard processing.")
+                    cache_too_small = True
+                else:
+                    raise e # Reraise if it's a different error (like authentication)
+            
+        elif not cache_too_small:
            print(f"Loading existing cache: {context_data['cache_id']}")
            print("Extending cache TTL by 60 minutes...")
-            client.caches.update(
-                name=context_data["cache_id"],
-                config=types.UpdateCachedContentConfig(
-                    ttl="3600s"
+            try:
+                client.caches.update(
+                    name=context_data["cache_id"],
+                    config=types.UpdateCachedContentConfig(
+                        ttl="3600s"
+                    )
                )
-            )
+            except Exception as e:
+                 print(f"Warning: Failed to update cache TTL. It may have expired. {e}")

    # ---------------------------------------------------------
    # GENERATION LOGIC
@@ -143,21 +158,25 @@ def main():
            "temperature": 0.0
        }

-        # The new SDK passes the cache ID directly into the generation config.
-        # If caching is used, the system_instruction must be in the cache, not here.
-        if context_data.get("cache_id"):
+        # Setup contents array
+        generation_contents = []
+        
+        if context_data.get("cache_id") and not cache_too_small:
+            # If we successfully cached, we just pass the cache ID in the config
            config_kwargs["cached_content"] = context_data["cache_id"]
        else:
+            # If we didn't cache (or it was too small), pass the files and system instruction directly
+            generation_contents.extend(file_objects)
            config_kwargs["system_instruction"] = system_instruction
            
+        generation_contents.append(prompt_text)
        config = types.GenerateContentConfig(**config_kwargs)

        print("Generating response (this may take a moment for large outputs)...")
        
-        # We use stream generation so it writes immediately, avoiding memory bottlenecks
        response_stream = client.models.generate_content_stream(
            model=args.model,
-            contents=prompt_text,
+            contents=generation_contents,
            config=config
        )
        
@@ -166,7 +185,7 @@ def main():
                for chunk in response_stream:
                    if chunk.text:
                        f.write(chunk.text)
-                        f.flush() # Stream direct to the disk in real-time
+                        f.flush()
            print(f"\nDone! Raw output saved directly to {args.output}")
        else:
            print("-" * 40)