Files

121 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
generate_images.py — Generate slide visuals with OpenAI's gpt-image-2.
Reads a prompts file (JSON list) and writes one PNG per entry into --assets.
gpt-image-2 has reasoning + strong in-image text rendering, so it is well suited
to diagrams, labelled schematics and infographics — not just decorative art.
Prompts file format (JSON):
[
{"id": "attention_schematic",
"prompt": "Clean technical diagram of scaled dot-product attention ...",
"shape": "landscape"},
{"id": "rnn_vs_transformer", "prompt": "...", "shape": "portrait"}
]
Usage:
export OPENAI_API_KEY=...
python3 generate_images.py prompts.json --assets workdir/assets
# options: --model gpt-image-2 --quality high --style-suffix "..."
Each PNG is saved as <id>.png — reference that exact filename in the deck spec's
"image" field.
NOTE for Codex/Hermes: if your agent runtime already has native gpt-image-2 image
generation, you may instead generate images directly and just save them as
<id>.png into the assets dir — this script is the portable API fallback.
"""
import argparse
import base64
import json
import os
import sys
import time
SIZES = { # gpt-image-2 supported sizes
"landscape": "1536x1024",
"portrait": "1024x1536",
"square": "1024x1024",
}
# Appended to every prompt so generated visuals share a coherent look that sits
# well on the deck background. Override with --style-suffix.
DEFAULT_STYLE = (
" Modern flat editorial illustration / technical diagram style. "
"Crisp vector-like shapes, generous whitespace, high contrast, "
"legible labels, restrained palette of deep navy, cyan and violet on a "
"near-black background. No watermark, no signature, no stock-photo look."
)
def make_client():
try:
from openai import OpenAI
except ImportError:
sys.exit("openai SDK not installed — run: pip install openai")
key = os.environ.get("OPENAI_API_KEY")
if not key:
sys.exit("OPENAI_API_KEY is not set.")
return OpenAI()
def generate(client, model, prompt, size, quality, transparent):
kwargs = dict(model=model, prompt=prompt, size=size, n=1)
# gpt-image models support quality + optional transparent background
if quality:
kwargs["quality"] = quality
if transparent:
kwargs["background"] = "transparent"
resp = client.images.generate(**kwargs)
return base64.b64decode(resp.data[0].b64_json)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("prompts")
ap.add_argument("--assets", default="assets")
ap.add_argument("--model", default="gpt-image-2")
ap.add_argument("--quality", default="high",
help="low | medium | high (gpt-image-2)")
ap.add_argument("--style-suffix", default=DEFAULT_STYLE)
ap.add_argument("--overwrite", action="store_true")
args = ap.parse_args()
os.makedirs(args.assets, exist_ok=True)
with open(args.prompts) as fh:
prompts = json.load(fh)
client = make_client()
ok = 0
for item in prompts:
iid = item["id"]
out = os.path.join(args.assets, f"{iid}.png")
if os.path.exists(out) and not args.overwrite:
print(f"· skip {iid} (exists)")
ok += 1
continue
size = SIZES.get(item.get("shape", "landscape"), SIZES["landscape"])
prompt = item["prompt"].strip() + args.style_suffix
for attempt in range(1, 4):
try:
data = generate(client, args.model, prompt, size,
args.quality, item.get("transparent", False))
with open(out, "wb") as fh:
fh.write(data)
print(f"{iid} ({size})")
ok += 1
break
except Exception as e:
wait = 3 * attempt
print(f" ! {iid} attempt {attempt} failed: {e} "
f"(retry in {wait}s)" if attempt < 3
else f"{iid} gave up: {e}")
if attempt < 3:
time.sleep(wait)
print(f"\n{ok}/{len(prompts)} images ready in {args.assets}/")
if __name__ == "__main__":
main()