#!/usr/bin/env python3 # /// script # requires-python = ">=3.10" # dependencies = [ # "google-genai>=1.0.0", # "pillow>=10.0.0", # ] # /// """ Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API. Usage: uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY] Multi-image editing (up to 14 images): uv run generate_image.py --prompt "combine these images" --filename "output.png" -i img1.png -i img2.png -i img3.png """ import argparse import os import sys from pathlib import Path def get_api_key(provided_key: str | None) -> str | None: """Get API key from argument first, then environment.""" if provided_key: return provided_key return os.environ.get("GEMINI_API_KEY") def main(): parser = argparse.ArgumentParser( description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)" ) parser.add_argument( "--prompt", "-p", required=True, help="Image description/prompt" ) parser.add_argument( "--filename", "-f", required=True, help="Output filename (e.g., sunset-mountains.png)" ) parser.add_argument( "--input-image", "-i", action="append", dest="input_images", metavar="IMAGE", help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)." ) parser.add_argument( "--resolution", "-r", choices=["1K", "2K", "4K"], default="1K", help="Output resolution: 1K (default), 2K, or 4K" ) parser.add_argument( "--api-key", "-k", help="Gemini API key (overrides GEMINI_API_KEY env var)" ) args = parser.parse_args() # Get API key api_key = get_api_key(args.api_key) if not api_key: print("Error: No API key provided.", file=sys.stderr) print("Please either:", file=sys.stderr) print(" 1. Provide --api-key argument", file=sys.stderr) print(" 2. Set GEMINI_API_KEY environment variable", file=sys.stderr) sys.exit(1) # Import here after checking API key to avoid slow import on error from google import genai from google.genai import types from PIL import Image as PILImage # Initialise client client = genai.Client(api_key=api_key) # Set up output path output_path = Path(args.filename) output_path.parent.mkdir(parents=True, exist_ok=True) # Load input images if provided (up to 14 supported by Nano Banana Pro) input_images = [] output_resolution = args.resolution if args.input_images: if len(args.input_images) > 14: print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr) sys.exit(1) max_input_dim = 0 for img_path in args.input_images: try: img = PILImage.open(img_path) input_images.append(img) print(f"Loaded input image: {img_path}") # Track largest dimension for auto-resolution width, height = img.size max_input_dim = max(max_input_dim, width, height) except Exception as e: print(f"Error loading input image '{img_path}': {e}", file=sys.stderr) sys.exit(1) # Auto-detect resolution from largest input if not explicitly set if args.resolution == "1K" and max_input_dim > 0: # Default value if max_input_dim >= 3000: output_resolution = "4K" elif max_input_dim >= 1500: output_resolution = "2K" else: output_resolution = "1K" print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})") # Build contents (images first if editing, prompt only if generating) if input_images: contents = [*input_images, args.prompt] img_count = len(input_images) print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...") else: contents = args.prompt print(f"Generating image with resolution {output_resolution}...") try: response = client.models.generate_content( model="gemini-3-pro-image-preview", contents=contents, config=types.GenerateContentConfig( response_modalities=["TEXT", "IMAGE"], image_config=types.ImageConfig( image_size=output_resolution ) ) ) # Process response and convert to PNG image_saved = False for part in response.parts: if part.text is not None: print(f"Model response: {part.text}") elif part.inline_data is not None: # Convert inline data to PIL Image and save as PNG from io import BytesIO # inline_data.data is already bytes, not base64 image_data = part.inline_data.data if isinstance(image_data, str): # If it's a string, it might be base64 import base64 image_data = base64.b64decode(image_data) image = PILImage.open(BytesIO(image_data)) # Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed) if image.mode == 'RGBA': rgb_image = PILImage.new('RGB', image.size, (255, 255, 255)) rgb_image.paste(image, mask=image.split()[3]) rgb_image.save(str(output_path), 'PNG') elif image.mode == 'RGB': image.save(str(output_path), 'PNG') else: image.convert('RGB').save(str(output_path), 'PNG') image_saved = True if image_saved: full_path = output_path.resolve() print(f"\nImage saved: {full_path}") # Clawdbot parses MEDIA tokens and will attach the file on supported providers. print(f"MEDIA: {full_path}") else: print("Error: No image was generated in the response.", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error generating image: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()