From 7655a501d05098dd6a822835958e0e08a18ed72e Mon Sep 17 00:00:00 2001 From: Michael Behr Date: Tue, 13 Jan 2026 19:33:18 -0500 Subject: [PATCH 1/3] feat(openai-image-gen): add model-specific parameter support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Auto-detect model and apply appropriate defaults for size/quality - Add --background, --output-format, and --style parameters - Enforce dall-e-3 count=1 limitation with automatic adjustment - Omit quality parameter for dall-e-2 (not supported) - Document model-specific parameters and supported values 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- skills/openai-image-gen/SKILL.md | 40 +++++++++++++ skills/openai-image-gen/scripts/gen.py | 79 +++++++++++++++++++++----- 2 files changed, 105 insertions(+), 14 deletions(-) diff --git a/skills/openai-image-gen/SKILL.md b/skills/openai-image-gen/SKILL.md index 627d3aef5..0d5f32492 100644 --- a/skills/openai-image-gen/SKILL.md +++ b/skills/openai-image-gen/SKILL.md @@ -19,11 +19,51 @@ open ~/Projects/tmp/openai-image-gen-*/index.html # if ~/Projects/tmp exists; e Useful flags: ```bash +# GPT image models with various options python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1 python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4 python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images +python3 {baseDir}/scripts/gen.py --model gpt-image-1.5 --background transparent --output-format webp + +# DALL-E 3 (note: count is automatically limited to 1) +python3 {baseDir}/scripts/gen.py --model dall-e-3 --quality hd --size 1792x1024 --style vivid +python3 {baseDir}/scripts/gen.py --model dall-e-3 --style natural --prompt "serene mountain landscape" + +# DALL-E 2 +python3 {baseDir}/scripts/gen.py --model dall-e-2 --size 512x512 --count 4 ``` +## Model-Specific Parameters + +Different models support different parameter values. The script automatically selects appropriate defaults based on the model. + +### Size + +- **GPT image models** (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`): `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto` + - Default: `1024x1024` +- **dall-e-3**: `1024x1024`, `1792x1024`, or `1024x1792` + - Default: `1024x1024` +- **dall-e-2**: `256x256`, `512x512`, or `1024x1024` + - Default: `1024x1024` + +### Quality + +- **GPT image models**: `auto`, `high`, `medium`, or `low` + - Default: `high` +- **dall-e-3**: `hd` or `standard` + - Default: `standard` +- **dall-e-2**: `standard` only + - Default: `standard` + +### Other Notable Differences + +- **dall-e-3** only supports generating 1 image at a time (`n=1`). The script automatically limits count to 1 when using this model. +- **GPT image models** support additional parameters: + - `--background`: `transparent`, `opaque`, or `auto` (default) + - `--output-format`: `png` (default), `jpeg`, or `webp` + - Note: `stream` and `moderation` are available via API but not yet implemented in this script +- **dall-e-3** has a `--style` parameter: `vivid` (hyper-real, dramatic) or `natural` (more natural looking) + ## Output - `*.png` images diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py index 0f2c59188..b8dd0b4c0 100644 --- a/skills/openai-image-gen/scripts/gen.py +++ b/skills/openai-image-gen/scripts/gen.py @@ -62,24 +62,53 @@ def pick_prompts(count: int) -> list[str]: return prompts +def get_model_defaults(model: str) -> tuple[str, str]: + """Return (default_size, default_quality) for the given model.""" + if model == "dall-e-2": + # quality will be ignored + return ("1024x1024", "standard") + elif model == "dall-e-3": + return ("1024x1024", "hd") + else: + # GPT image or future models + return ("1024x1024", "high") + + def request_images( api_key: str, prompt: str, model: str, size: str, quality: str, + background: str = "", + output_format: str = "", + style: str = "", ) -> dict: url = "https://api.openai.com/v1/images/generations" - body = json.dumps( - { - "model": model, - "prompt": prompt, - "size": size, - "quality": quality, - "n": 1, - "response_format": "b64_json", - } - ).encode("utf-8") + args = { + "model": model, + "prompt": prompt, + "size": size, + "n": 1, + } + + # Quality parameter - dall-e-2 doesn't accept this parameter + if model != "dall-e-2": + args["quality"] = quality + + if model.startswith("dall-e"): + args["response_format"] = "b64_json" + + if model.startswith("gpt-image"): + if background: + args["background"] = background + if output_format: + args["output_format"] = output_format + + if model == "dall-e-3" and style: + args["style"] = style + + body = json.dumps(args).encode("utf-8") req = urllib.request.Request( url, method="POST", @@ -136,8 +165,11 @@ def main() -> int: ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.") ap.add_argument("--count", type=int, default=8, help="How many images to generate.") ap.add_argument("--model", default="gpt-image-1", help="Image model id.") - ap.add_argument("--size", default="1024x1024", help="Image size (e.g. 1024x1024, 1536x1024).") - ap.add_argument("--quality", default="high", help="Image quality (varies by model).") + ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.") + ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.") + ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.") + ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.") + ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.") ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-).") args = ap.parse_args() @@ -146,15 +178,34 @@ def main() -> int: print("Missing OPENAI_API_KEY", file=sys.stderr) return 2 + # Apply model-specific defaults if not specified + default_size, default_quality = get_model_defaults(args.model) + size = args.size or default_size + quality = args.quality or default_quality + + count = args.count + if args.model == "dall-e-3" and count > 1: + print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr) + count = 1 + out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir() out_dir.mkdir(parents=True, exist_ok=True) - prompts = [args.prompt] * args.count if args.prompt else pick_prompts(args.count) + prompts = [args.prompt] * count if args.prompt else pick_prompts(count) items: list[dict] = [] for idx, prompt in enumerate(prompts, start=1): print(f"[{idx}/{len(prompts)}] {prompt}") - res = request_images(api_key, prompt, args.model, args.size, args.quality) + res = request_images( + api_key, + prompt, + args.model, + size, + quality, + args.background, + args.output_format, + args.style, + ) b64 = res.get("data", [{}])[0].get("b64_json") if not b64: raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}") From 6ac1c1d6ea0359069cf8d4c6e73ea20c0dad54e2 Mon Sep 17 00:00:00 2001 From: Michael Behr Date: Tue, 13 Jan 2026 19:59:17 -0500 Subject: [PATCH 2/3] fix(openai-image-gen): use correct file extension for output format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When --output-format is specified for GPT models, save files with the correct extension (.webp, .jpeg, or .png) instead of always using .png. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- skills/openai-image-gen/scripts/gen.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py index b8dd0b4c0..d09c33bbc 100644 --- a/skills/openai-image-gen/scripts/gen.py +++ b/skills/openai-image-gen/scripts/gen.py @@ -193,6 +193,12 @@ def main() -> int: prompts = [args.prompt] * count if args.prompt else pick_prompts(count) + # Determine file extension based on output format + if args.model.startswith("gpt-image") and args.output_format: + file_ext = args.output_format + else: + file_ext = "png" + items: list[dict] = [] for idx, prompt in enumerate(prompts, start=1): print(f"[{idx}/{len(prompts)}] {prompt}") @@ -209,9 +215,9 @@ def main() -> int: b64 = res.get("data", [{}])[0].get("b64_json") if not b64: raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}") - png = base64.b64decode(b64) - filename = f"{idx:03d}-{slugify(prompt)[:40]}.png" - (out_dir / filename).write_bytes(png) + image_bytes = base64.b64decode(b64) + filename = f"{idx:03d}-{slugify(prompt)[:40]}.{file_ext}" + (out_dir / filename).write_bytes(image_bytes) items.append({"prompt": prompt, "file": filename}) (out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8") From b3ab24eb8e64f6c4bd66a7fca48658b0b0c7962e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 16 Jan 2026 08:41:23 +0000 Subject: [PATCH 3/3] fix: align image-gen defaults (#880) (thanks @mkbehr) --- CHANGELOG.md | 1 + skills/openai-image-gen/SKILL.md | 2 +- skills/openai-image-gen/scripts/gen.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9601811b1..31b0d4011 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - TUI: show provider/model labels for the active session and default model. - Heartbeat: add per-agent heartbeat configuration and multi-agent docs example. - Fix: list model picker entries as provider/model pairs for explicit selection. (#970) — thanks @mcinteerj. +- Fix: align OpenAI image-gen defaults with DALL-E 3 standard quality and document output formats. (#880) — thanks @mkbehr. - Fix: persist `gateway.mode=local` after selecting Local run mode in `clawdbot configure`, even if no other sections are chosen. - Daemon: fix profile-aware service label resolution (env-driven) and add coverage for launchd/systemd/schtasks. (#969) — thanks @bjesuiter. - Daemon: share profile/state-dir resolution across service helpers and honor `CLAWDBOT_STATE_DIR` for Windows task scripts. diff --git a/skills/openai-image-gen/SKILL.md b/skills/openai-image-gen/SKILL.md index 0d5f32492..d1ebb1236 100644 --- a/skills/openai-image-gen/SKILL.md +++ b/skills/openai-image-gen/SKILL.md @@ -66,6 +66,6 @@ Different models support different parameter values. The script automatically se ## Output -- `*.png` images +- `*.png`, `*.jpeg`, or `*.webp` images (output format depends on model + `--output-format`) - `prompts.json` (prompt → file mapping) - `index.html` (thumbnail gallery) diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py index d09c33bbc..8024b055b 100644 --- a/skills/openai-image-gen/scripts/gen.py +++ b/skills/openai-image-gen/scripts/gen.py @@ -68,7 +68,7 @@ def get_model_defaults(model: str) -> tuple[str, str]: # quality will be ignored return ("1024x1024", "standard") elif model == "dall-e-3": - return ("1024x1024", "hd") + return ("1024x1024", "standard") else: # GPT image or future models return ("1024x1024", "high")