docs: update SKILL.md and generate_image.py to support multi-image editing and improve input handling
This commit is contained in:
@@ -14,9 +14,14 @@ Generate
|
|||||||
uv run {baseDir}/scripts/generate_image.py --prompt "your image description" --filename "output.png" --resolution 1K
|
uv run {baseDir}/scripts/generate_image.py --prompt "your image description" --filename "output.png" --resolution 1K
|
||||||
```
|
```
|
||||||
|
|
||||||
Edit
|
Edit (single image)
|
||||||
```bash
|
```bash
|
||||||
uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" --input-image "/path/in.png" --resolution 2K
|
uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K
|
||||||
|
```
|
||||||
|
|
||||||
|
Multi-image composition (up to 14 images)
|
||||||
|
```bash
|
||||||
|
uv run {baseDir}/scripts/generate_image.py --prompt "combine these into one scene" --filename "output.png" -i img1.png -i img2.png -i img3.png
|
||||||
```
|
```
|
||||||
|
|
||||||
API key
|
API key
|
||||||
|
|||||||
@@ -11,6 +11,9 @@ Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API.
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY]
|
uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY]
|
||||||
|
|
||||||
|
Multi-image editing (up to 14 images):
|
||||||
|
uv run generate_image.py --prompt "combine these images" --filename "output.png" -i img1.png -i img2.png -i img3.png
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@@ -42,7 +45,10 @@ def main():
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--input-image", "-i",
|
"--input-image", "-i",
|
||||||
help="Optional input image path for editing/modification"
|
action="append",
|
||||||
|
dest="input_images",
|
||||||
|
metavar="IMAGE",
|
||||||
|
help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)."
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--resolution", "-r",
|
"--resolution", "-r",
|
||||||
@@ -78,34 +84,43 @@ def main():
|
|||||||
output_path = Path(args.filename)
|
output_path = Path(args.filename)
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Load input image if provided
|
# Load input images if provided (up to 14 supported by Nano Banana Pro)
|
||||||
input_image = None
|
input_images = []
|
||||||
output_resolution = args.resolution
|
output_resolution = args.resolution
|
||||||
if args.input_image:
|
if args.input_images:
|
||||||
try:
|
if len(args.input_images) > 14:
|
||||||
input_image = PILImage.open(args.input_image)
|
print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr)
|
||||||
print(f"Loaded input image: {args.input_image}")
|
|
||||||
|
|
||||||
# Auto-detect resolution if not explicitly set by user
|
|
||||||
if args.resolution == "1K": # Default value
|
|
||||||
# Map input image size to resolution
|
|
||||||
width, height = input_image.size
|
|
||||||
max_dim = max(width, height)
|
|
||||||
if max_dim >= 3000:
|
|
||||||
output_resolution = "4K"
|
|
||||||
elif max_dim >= 1500:
|
|
||||||
output_resolution = "2K"
|
|
||||||
else:
|
|
||||||
output_resolution = "1K"
|
|
||||||
print(f"Auto-detected resolution: {output_resolution} (from input {width}x{height})")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error loading input image: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Build contents (image first if editing, prompt only if generating)
|
max_input_dim = 0
|
||||||
if input_image:
|
for img_path in args.input_images:
|
||||||
contents = [input_image, args.prompt]
|
try:
|
||||||
print(f"Editing image with resolution {output_resolution}...")
|
img = PILImage.open(img_path)
|
||||||
|
input_images.append(img)
|
||||||
|
print(f"Loaded input image: {img_path}")
|
||||||
|
|
||||||
|
# Track largest dimension for auto-resolution
|
||||||
|
width, height = img.size
|
||||||
|
max_input_dim = max(max_input_dim, width, height)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading input image '{img_path}': {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Auto-detect resolution from largest input if not explicitly set
|
||||||
|
if args.resolution == "1K" and max_input_dim > 0: # Default value
|
||||||
|
if max_input_dim >= 3000:
|
||||||
|
output_resolution = "4K"
|
||||||
|
elif max_input_dim >= 1500:
|
||||||
|
output_resolution = "2K"
|
||||||
|
else:
|
||||||
|
output_resolution = "1K"
|
||||||
|
print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})")
|
||||||
|
|
||||||
|
# Build contents (images first if editing, prompt only if generating)
|
||||||
|
if input_images:
|
||||||
|
contents = [*input_images, args.prompt]
|
||||||
|
img_count = len(input_images)
|
||||||
|
print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...")
|
||||||
else:
|
else:
|
||||||
contents = args.prompt
|
contents = args.prompt
|
||||||
print(f"Generating image with resolution {output_resolution}...")
|
print(f"Generating image with resolution {output_resolution}...")
|
||||||
|
|||||||
Reference in New Issue
Block a user