Skills/research-paper-presenter/scripts/generate_images.py

#!/usr/bin/env python3
"""
generate_images.py — Generate slide visuals with OpenAI's gpt-image-2.

Reads a prompts file (JSON list) and writes one PNG per entry into --assets.
gpt-image-2 has reasoning + strong in-image text rendering, so it is well suited
to diagrams, labelled schematics and infographics — not just decorative art.

Prompts file format (JSON):
[
  {"id": "attention_schematic",
   "prompt": "Clean technical diagram of scaled dot-product attention ...",
   "shape": "landscape"},
  {"id": "rnn_vs_transformer", "prompt": "...", "shape": "portrait"}
]

Usage:
    export OPENAI_API_KEY=...
    python3 generate_images.py prompts.json --assets workdir/assets
    # options: --model gpt-image-2  --quality high  --style-suffix "..."

Each PNG is saved as <id>.png — reference that exact filename in the deck spec's
"image" field.

NOTE for Codex/Hermes: if your agent runtime already has native gpt-image-2 image
generation, you may instead generate images directly and just save them as
<id>.png into the assets dir — this script is the portable API fallback.
"""
import argparse
import base64
import json
import os
import sys
import time

SIZES = {            # gpt-image-2 supported sizes
    "landscape": "1536x1024",
    "portrait": "1024x1536",
    "square": "1024x1024",
}

# Appended to every prompt so generated visuals share a coherent look that sits
# well on the deck background. Override with --style-suffix.
DEFAULT_STYLE = (
    " Modern flat editorial illustration / technical diagram style. "
    "Crisp vector-like shapes, generous whitespace, high contrast, "
    "legible labels, restrained palette of deep navy, cyan and violet on a "
    "near-black background. No watermark, no signature, no stock-photo look."
)


def make_client():
    try:
        from openai import OpenAI
    except ImportError:
        sys.exit("openai SDK not installed — run: pip install openai")
    key = os.environ.get("OPENAI_API_KEY")
    if not key:
        sys.exit("OPENAI_API_KEY is not set.")
    return OpenAI()


def generate(client, model, prompt, size, quality, transparent):
    kwargs = dict(model=model, prompt=prompt, size=size, n=1)
    # gpt-image models support quality + optional transparent background
    if quality:
        kwargs["quality"] = quality
    if transparent:
        kwargs["background"] = "transparent"
    resp = client.images.generate(**kwargs)
    return base64.b64decode(resp.data[0].b64_json)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("prompts")
    ap.add_argument("--assets", default="assets")
    ap.add_argument("--model", default="gpt-image-2")
    ap.add_argument("--quality", default="high",
                    help="low | medium | high (gpt-image-2)")
    ap.add_argument("--style-suffix", default=DEFAULT_STYLE)
    ap.add_argument("--overwrite", action="store_true")
    args = ap.parse_args()

    os.makedirs(args.assets, exist_ok=True)
    with open(args.prompts) as fh:
        prompts = json.load(fh)

    client = make_client()
    ok = 0
    for item in prompts:
        iid = item["id"]
        out = os.path.join(args.assets, f"{iid}.png")
        if os.path.exists(out) and not args.overwrite:
            print(f"· skip {iid} (exists)")
            ok += 1
            continue
        size = SIZES.get(item.get("shape", "landscape"), SIZES["landscape"])
        prompt = item["prompt"].strip() + args.style_suffix
        for attempt in range(1, 4):
            try:
                data = generate(client, args.model, prompt, size,
                                args.quality, item.get("transparent", False))
                with open(out, "wb") as fh:
                    fh.write(data)
                print(f"✓ {iid}  ({size})")
                ok += 1
                break
            except Exception as e:
                wait = 3 * attempt
                print(f"  ! {iid} attempt {attempt} failed: {e} "
                      f"(retry in {wait}s)" if attempt < 3
                      else f"  ✗ {iid} gave up: {e}")
                if attempt < 3:
                    time.sleep(wait)
    print(f"\n{ok}/{len(prompts)} images ready in {args.assets}/")


if __name__ == "__main__":
    main()