feat(openai-image-gen): add model-specific parameter support
- Auto-detect model and apply appropriate defaults for size/quality - Add --background, --output-format, and --style parameters - Enforce dall-e-3 count=1 limitation with automatic adjustment - Omit quality parameter for dall-e-2 (not supported) - Document model-specific parameters and supported values 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
3b1b14b0b1
commit
7655a501d0
@@ -19,11 +19,51 @@ open ~/Projects/tmp/openai-image-gen-*/index.html # if ~/Projects/tmp exists; e
|
|||||||
Useful flags:
|
Useful flags:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# GPT image models with various options
|
||||||
python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
|
python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
|
||||||
python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
|
python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
|
||||||
python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
|
python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
|
||||||
|
python3 {baseDir}/scripts/gen.py --model gpt-image-1.5 --background transparent --output-format webp
|
||||||
|
|
||||||
|
# DALL-E 3 (note: count is automatically limited to 1)
|
||||||
|
python3 {baseDir}/scripts/gen.py --model dall-e-3 --quality hd --size 1792x1024 --style vivid
|
||||||
|
python3 {baseDir}/scripts/gen.py --model dall-e-3 --style natural --prompt "serene mountain landscape"
|
||||||
|
|
||||||
|
# DALL-E 2
|
||||||
|
python3 {baseDir}/scripts/gen.py --model dall-e-2 --size 512x512 --count 4
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Model-Specific Parameters
|
||||||
|
|
||||||
|
Different models support different parameter values. The script automatically selects appropriate defaults based on the model.
|
||||||
|
|
||||||
|
### Size
|
||||||
|
|
||||||
|
- **GPT image models** (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`): `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto`
|
||||||
|
- Default: `1024x1024`
|
||||||
|
- **dall-e-3**: `1024x1024`, `1792x1024`, or `1024x1792`
|
||||||
|
- Default: `1024x1024`
|
||||||
|
- **dall-e-2**: `256x256`, `512x512`, or `1024x1024`
|
||||||
|
- Default: `1024x1024`
|
||||||
|
|
||||||
|
### Quality
|
||||||
|
|
||||||
|
- **GPT image models**: `auto`, `high`, `medium`, or `low`
|
||||||
|
- Default: `high`
|
||||||
|
- **dall-e-3**: `hd` or `standard`
|
||||||
|
- Default: `standard`
|
||||||
|
- **dall-e-2**: `standard` only
|
||||||
|
- Default: `standard`
|
||||||
|
|
||||||
|
### Other Notable Differences
|
||||||
|
|
||||||
|
- **dall-e-3** only supports generating 1 image at a time (`n=1`). The script automatically limits count to 1 when using this model.
|
||||||
|
- **GPT image models** support additional parameters:
|
||||||
|
- `--background`: `transparent`, `opaque`, or `auto` (default)
|
||||||
|
- `--output-format`: `png` (default), `jpeg`, or `webp`
|
||||||
|
- Note: `stream` and `moderation` are available via API but not yet implemented in this script
|
||||||
|
- **dall-e-3** has a `--style` parameter: `vivid` (hyper-real, dramatic) or `natural` (more natural looking)
|
||||||
|
|
||||||
## Output
|
## Output
|
||||||
|
|
||||||
- `*.png` images
|
- `*.png` images
|
||||||
|
|||||||
@@ -62,24 +62,53 @@ def pick_prompts(count: int) -> list[str]:
|
|||||||
return prompts
|
return prompts
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_defaults(model: str) -> tuple[str, str]:
|
||||||
|
"""Return (default_size, default_quality) for the given model."""
|
||||||
|
if model == "dall-e-2":
|
||||||
|
# quality will be ignored
|
||||||
|
return ("1024x1024", "standard")
|
||||||
|
elif model == "dall-e-3":
|
||||||
|
return ("1024x1024", "hd")
|
||||||
|
else:
|
||||||
|
# GPT image or future models
|
||||||
|
return ("1024x1024", "high")
|
||||||
|
|
||||||
|
|
||||||
def request_images(
|
def request_images(
|
||||||
api_key: str,
|
api_key: str,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
model: str,
|
model: str,
|
||||||
size: str,
|
size: str,
|
||||||
quality: str,
|
quality: str,
|
||||||
|
background: str = "",
|
||||||
|
output_format: str = "",
|
||||||
|
style: str = "",
|
||||||
) -> dict:
|
) -> dict:
|
||||||
url = "https://api.openai.com/v1/images/generations"
|
url = "https://api.openai.com/v1/images/generations"
|
||||||
body = json.dumps(
|
args = {
|
||||||
{
|
"model": model,
|
||||||
"model": model,
|
"prompt": prompt,
|
||||||
"prompt": prompt,
|
"size": size,
|
||||||
"size": size,
|
"n": 1,
|
||||||
"quality": quality,
|
}
|
||||||
"n": 1,
|
|
||||||
"response_format": "b64_json",
|
# Quality parameter - dall-e-2 doesn't accept this parameter
|
||||||
}
|
if model != "dall-e-2":
|
||||||
).encode("utf-8")
|
args["quality"] = quality
|
||||||
|
|
||||||
|
if model.startswith("dall-e"):
|
||||||
|
args["response_format"] = "b64_json"
|
||||||
|
|
||||||
|
if model.startswith("gpt-image"):
|
||||||
|
if background:
|
||||||
|
args["background"] = background
|
||||||
|
if output_format:
|
||||||
|
args["output_format"] = output_format
|
||||||
|
|
||||||
|
if model == "dall-e-3" and style:
|
||||||
|
args["style"] = style
|
||||||
|
|
||||||
|
body = json.dumps(args).encode("utf-8")
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
url,
|
url,
|
||||||
method="POST",
|
method="POST",
|
||||||
@@ -136,8 +165,11 @@ def main() -> int:
|
|||||||
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
|
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
|
||||||
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
|
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
|
||||||
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
|
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
|
||||||
ap.add_argument("--size", default="1024x1024", help="Image size (e.g. 1024x1024, 1536x1024).")
|
ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.")
|
||||||
ap.add_argument("--quality", default="high", help="Image quality (varies by model).")
|
ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.")
|
||||||
|
ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.")
|
||||||
|
ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.")
|
||||||
|
ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.")
|
||||||
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
|
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
@@ -146,15 +178,34 @@ def main() -> int:
|
|||||||
print("Missing OPENAI_API_KEY", file=sys.stderr)
|
print("Missing OPENAI_API_KEY", file=sys.stderr)
|
||||||
return 2
|
return 2
|
||||||
|
|
||||||
|
# Apply model-specific defaults if not specified
|
||||||
|
default_size, default_quality = get_model_defaults(args.model)
|
||||||
|
size = args.size or default_size
|
||||||
|
quality = args.quality or default_quality
|
||||||
|
|
||||||
|
count = args.count
|
||||||
|
if args.model == "dall-e-3" and count > 1:
|
||||||
|
print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr)
|
||||||
|
count = 1
|
||||||
|
|
||||||
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
|
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
prompts = [args.prompt] * args.count if args.prompt else pick_prompts(args.count)
|
prompts = [args.prompt] * count if args.prompt else pick_prompts(count)
|
||||||
|
|
||||||
items: list[dict] = []
|
items: list[dict] = []
|
||||||
for idx, prompt in enumerate(prompts, start=1):
|
for idx, prompt in enumerate(prompts, start=1):
|
||||||
print(f"[{idx}/{len(prompts)}] {prompt}")
|
print(f"[{idx}/{len(prompts)}] {prompt}")
|
||||||
res = request_images(api_key, prompt, args.model, args.size, args.quality)
|
res = request_images(
|
||||||
|
api_key,
|
||||||
|
prompt,
|
||||||
|
args.model,
|
||||||
|
size,
|
||||||
|
quality,
|
||||||
|
args.background,
|
||||||
|
args.output_format,
|
||||||
|
args.style,
|
||||||
|
)
|
||||||
b64 = res.get("data", [{}])[0].get("b64_json")
|
b64 = res.get("data", [{}])[0].get("b64_json")
|
||||||
if not b64:
|
if not b64:
|
||||||
raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
|
raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
|
||||||
|
|||||||
Reference in New Issue
Block a user