From 7655a501d05098dd6a822835958e0e08a18ed72e Mon Sep 17 00:00:00 2001
From: Michael Behr <mkbehr@gmail.com>
Date: Tue, 13 Jan 2026 19:33:18 -0500
Subject: [PATCH 1/3] feat(openai-image-gen): add model-specific parameter
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Auto-detect model and apply appropriate defaults for size/quality
- Add --background, --output-format, and --style parameters
- Enforce dall-e-3 count=1 limitation with automatic adjustment
- Omit quality parameter for dall-e-2 (not supported)
- Document model-specific parameters and supported values

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 skills/openai-image-gen/SKILL.md       | 40 +++++++++++++
 skills/openai-image-gen/scripts/gen.py | 79 +++++++++++++++++++++-----
 2 files changed, 105 insertions(+), 14 deletions(-)

diff --git a/skills/openai-image-gen/SKILL.md b/skills/openai-image-gen/SKILL.md
index 627d3aef5..0d5f32492 100644
--- a/skills/openai-image-gen/SKILL.md
+++ b/skills/openai-image-gen/SKILL.md
@@ -19,11 +19,51 @@ open ~/Projects/tmp/openai-image-gen-*/index.html  # if ~/Projects/tmp exists; e
 Useful flags:
 
 ```bash
+# GPT image models with various options
 python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
 python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
 python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
+python3 {baseDir}/scripts/gen.py --model gpt-image-1.5 --background transparent --output-format webp
+
+# DALL-E 3 (note: count is automatically limited to 1)
+python3 {baseDir}/scripts/gen.py --model dall-e-3 --quality hd --size 1792x1024 --style vivid
+python3 {baseDir}/scripts/gen.py --model dall-e-3 --style natural --prompt "serene mountain landscape"
+
+# DALL-E 2
+python3 {baseDir}/scripts/gen.py --model dall-e-2 --size 512x512 --count 4
 ```
 
+## Model-Specific Parameters
+
+Different models support different parameter values. The script automatically selects appropriate defaults based on the model.
+
+### Size
+
+- **GPT image models** (`gpt-image-1`, `gpt-image-1-mini`, `gpt-image-1.5`): `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto`
+  - Default: `1024x1024`
+- **dall-e-3**: `1024x1024`, `1792x1024`, or `1024x1792`
+  - Default: `1024x1024`
+- **dall-e-2**: `256x256`, `512x512`, or `1024x1024`
+  - Default: `1024x1024`
+
+### Quality
+
+- **GPT image models**: `auto`, `high`, `medium`, or `low`
+  - Default: `high`
+- **dall-e-3**: `hd` or `standard`
+  - Default: `standard`
+- **dall-e-2**: `standard` only
+  - Default: `standard`
+
+### Other Notable Differences
+
+- **dall-e-3** only supports generating 1 image at a time (`n=1`). The script automatically limits count to 1 when using this model.
+- **GPT image models** support additional parameters:
+  - `--background`: `transparent`, `opaque`, or `auto` (default)
+  - `--output-format`: `png` (default), `jpeg`, or `webp`
+  - Note: `stream` and `moderation` are available via API but not yet implemented in this script
+- **dall-e-3** has a `--style` parameter: `vivid` (hyper-real, dramatic) or `natural` (more natural looking)
+
 ## Output
 
 - `*.png` images
diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py
index 0f2c59188..b8dd0b4c0 100644
--- a/skills/openai-image-gen/scripts/gen.py
+++ b/skills/openai-image-gen/scripts/gen.py
@@ -62,24 +62,53 @@ def pick_prompts(count: int) -> list[str]:
     return prompts
 
 
+def get_model_defaults(model: str) -> tuple[str, str]:
+    """Return (default_size, default_quality) for the given model."""
+    if model == "dall-e-2":
+        # quality will be ignored
+        return ("1024x1024", "standard")
+    elif model == "dall-e-3":
+        return ("1024x1024", "hd")
+    else:
+        # GPT image or future models
+        return ("1024x1024", "high")
+
+
 def request_images(
     api_key: str,
     prompt: str,
     model: str,
     size: str,
     quality: str,
+    background: str = "",
+    output_format: str = "",
+    style: str = "",
 ) -> dict:
     url = "https://api.openai.com/v1/images/generations"
-    body = json.dumps(
-        {
-            "model": model,
-            "prompt": prompt,
-            "size": size,
-            "quality": quality,
-            "n": 1,
-            "response_format": "b64_json",
-        }
-    ).encode("utf-8")
+    args = {
+        "model": model,
+        "prompt": prompt,
+        "size": size,
+        "n": 1,
+    }
+
+    # Quality parameter - dall-e-2 doesn't accept this parameter
+    if model != "dall-e-2":
+        args["quality"] = quality
+
+    if model.startswith("dall-e"):
+        args["response_format"] = "b64_json"
+
+    if model.startswith("gpt-image"):
+        if background:
+            args["background"] = background
+        if output_format:
+            args["output_format"] = output_format
+
+    if model == "dall-e-3" and style:
+        args["style"] = style
+
+    body = json.dumps(args).encode("utf-8")
     req = urllib.request.Request(
         url,
         method="POST",
@@ -136,8 +165,11 @@ def main() -> int:
     ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
     ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
     ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
-    ap.add_argument("--size", default="1024x1024", help="Image size (e.g. 1024x1024, 1536x1024).")
-    ap.add_argument("--quality", default="high", help="Image quality (varies by model).")
+    ap.add_argument("--size", default="", help="Image size (e.g. 1024x1024, 1536x1024). Defaults based on model if not specified.")
+    ap.add_argument("--quality", default="", help="Image quality (e.g. high, standard). Defaults based on model if not specified.")
+    ap.add_argument("--background", default="", help="Background transparency (GPT models only): transparent, opaque, or auto.")
+    ap.add_argument("--output-format", default="", help="Output format (GPT models only): png, jpeg, or webp.")
+    ap.add_argument("--style", default="", help="Image style (dall-e-3 only): vivid or natural.")
     ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
     args = ap.parse_args()
 
@@ -146,15 +178,34 @@ def main() -> int:
         print("Missing OPENAI_API_KEY", file=sys.stderr)
         return 2
 
+    # Apply model-specific defaults if not specified
+    default_size, default_quality = get_model_defaults(args.model)
+    size = args.size or default_size
+    quality = args.quality or default_quality
+
+    count = args.count
+    if args.model == "dall-e-3" and count > 1:
+        print(f"Warning: dall-e-3 only supports generating 1 image at a time. Reducing count from {count} to 1.", file=sys.stderr)
+        count = 1
+
     out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
     out_dir.mkdir(parents=True, exist_ok=True)
 
-    prompts = [args.prompt] * args.count if args.prompt else pick_prompts(args.count)
+    prompts = [args.prompt] * count if args.prompt else pick_prompts(count)
 
     items: list[dict] = []
     for idx, prompt in enumerate(prompts, start=1):
         print(f"[{idx}/{len(prompts)}] {prompt}")
-        res = request_images(api_key, prompt, args.model, args.size, args.quality)
+        res = request_images(
+            api_key,
+            prompt,
+            args.model,
+            size,
+            quality,
+            args.background,
+            args.output_format,
+            args.style,
+        )
         b64 = res.get("data", [{}])[0].get("b64_json")
         if not b64:
             raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")

From 6ac1c1d6ea0359069cf8d4c6e73ea20c0dad54e2 Mon Sep 17 00:00:00 2001
From: Michael Behr <mkbehr@gmail.com>
Date: Tue, 13 Jan 2026 19:59:17 -0500
Subject: [PATCH 2/3] fix(openai-image-gen): use correct file extension for
 output format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When --output-format is specified for GPT models, save files with
the correct extension (.webp, .jpeg, or .png) instead of always
using .png.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 skills/openai-image-gen/scripts/gen.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py
index b8dd0b4c0..d09c33bbc 100644
--- a/skills/openai-image-gen/scripts/gen.py
+++ b/skills/openai-image-gen/scripts/gen.py
@@ -193,6 +193,12 @@ def main() -> int:
 
     prompts = [args.prompt] * count if args.prompt else pick_prompts(count)
 
+    # Determine file extension based on output format
+    if args.model.startswith("gpt-image") and args.output_format:
+        file_ext = args.output_format
+    else:
+        file_ext = "png"
+
     items: list[dict] = []
     for idx, prompt in enumerate(prompts, start=1):
         print(f"[{idx}/{len(prompts)}] {prompt}")
@@ -209,9 +215,9 @@ def main() -> int:
         b64 = res.get("data", [{}])[0].get("b64_json")
         if not b64:
             raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
-        png = base64.b64decode(b64)
-        filename = f"{idx:03d}-{slugify(prompt)[:40]}.png"
-        (out_dir / filename).write_bytes(png)
+        image_bytes = base64.b64decode(b64)
+        filename = f"{idx:03d}-{slugify(prompt)[:40]}.{file_ext}"
+        (out_dir / filename).write_bytes(image_bytes)
         items.append({"prompt": prompt, "file": filename})
 
     (out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8")

From b3ab24eb8e64f6c4bd66a7fca48658b0b0c7962e Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Fri, 16 Jan 2026 08:41:23 +0000
Subject: [PATCH 3/3] fix: align image-gen defaults (#880) (thanks @mkbehr)

---
 CHANGELOG.md                           | 1 +
 skills/openai-image-gen/SKILL.md       | 2 +-
 skills/openai-image-gen/scripts/gen.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9601811b1..31b0d4011 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - TUI: show provider/model labels for the active session and default model.
 - Heartbeat: add per-agent heartbeat configuration and multi-agent docs example.
 - Fix: list model picker entries as provider/model pairs for explicit selection. (#970) — thanks @mcinteerj.
+- Fix: align OpenAI image-gen defaults with DALL-E 3 standard quality and document output formats. (#880) — thanks @mkbehr.
 - Fix: persist `gateway.mode=local` after selecting Local run mode in `clawdbot configure`, even if no other sections are chosen.
 - Daemon: fix profile-aware service label resolution (env-driven) and add coverage for launchd/systemd/schtasks. (#969) — thanks @bjesuiter.
 - Daemon: share profile/state-dir resolution across service helpers and honor `CLAWDBOT_STATE_DIR` for Windows task scripts.
diff --git a/skills/openai-image-gen/SKILL.md b/skills/openai-image-gen/SKILL.md
index 0d5f32492..d1ebb1236 100644
--- a/skills/openai-image-gen/SKILL.md
+++ b/skills/openai-image-gen/SKILL.md
@@ -66,6 +66,6 @@ Different models support different parameter values. The script automatically se
 
 ## Output
 
-- `*.png` images
+- `*.png`, `*.jpeg`, or `*.webp` images (output format depends on model + `--output-format`)
 - `prompts.json` (prompt → file mapping)
 - `index.html` (thumbnail gallery)
diff --git a/skills/openai-image-gen/scripts/gen.py b/skills/openai-image-gen/scripts/gen.py
index d09c33bbc..8024b055b 100644
--- a/skills/openai-image-gen/scripts/gen.py
+++ b/skills/openai-image-gen/scripts/gen.py
@@ -68,7 +68,7 @@ def get_model_defaults(model: str) -> tuple[str, str]:
         # quality will be ignored
         return ("1024x1024", "standard")
     elif model == "dall-e-3":
-        return ("1024x1024", "hd")
+        return ("1024x1024", "standard")
     else:
         # GPT image or future models
         return ("1024x1024", "high")