Skip to content

Instantly share code, notes, and snippets.

@jamessdixon
Created January 13, 2025 01:31
Show Gist options
  • Save jamessdixon/5a70d8e5850c1dadc093ace2eb2147a5 to your computer and use it in GitHub Desktop.
Save jamessdixon/5a70d8e5850c1dadc093ace2eb2147a5 to your computer and use it in GitHub Desktop.
create_dummy_documents
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install transformers diffusers torch accelerate"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import io\n",
"import os\n",
"import requests\n",
"from diffusers import StableDiffusionPipeline\n",
"import torch\n",
"from PIL import Image\n",
"from io import BytesIO"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def create_prompts():\n",
" prompts = []\n",
" document_types = ['hawaii drivers license with character name, picture, and address', 'english passport with character name, picture, and address', 'cambridge university student id with character name, picture, and address']\n",
" characters = ['King Arthur', 'Sir Lancelot', 'Sir Galahad', 'Sir Robin']\n",
" for document_type in document_types:\n",
" for character in characters:\n",
" prompt = f\"Create a {document_type} for {character}.\"\n",
" prompts.append(prompt)\n",
" return prompts\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def create_pipe():\n",
" model_id = \"stabilityai/stable-diffusion-2\"\n",
" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)\n",
" pipe.to(\"mps\")\n",
" return pipe"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def create_image(pipe, document_number, prompt):\n",
" generated_image = pipe(prompt).images[0]\n",
" image_dir = os.path.join(os.curdir, 'data')\n",
" image_path = os.path.join(image_dir, str(document_number) + '.png')\n",
"\n",
" with open(image_path, \"wb\") as image_file:\n",
" with BytesIO() as img_byte_arr:\n",
" generated_image.save(img_byte_arr, format='PNG')\n",
" img_byte_arr.seek(0)\n",
" image_file.write(img_byte_arr.read())\n",
"\n",
" print(f\"{document_number} Done.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: \n",
"```\n",
"pip install accelerate\n",
"```\n",
".\n",
"Loading pipeline components...: 100%|██████████| 6/6 [00:02<00:00, 2.14it/s]\n",
"100%|██████████| 50/50 [00:53<00:00, 1.06s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:52<00:00, 1.06s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:54<00:00, 1.10s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:01<00:00, 1.23s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"3 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:54<00:00, 2.29s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"4 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:55<00:00, 1.11s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [00:58<00:00, 1.17s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"6 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:09<00:00, 1.40s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"7 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:08<00:00, 1.36s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:04<00:00, 1.29s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:01<00:00, 1.23s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 Done.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 50/50 [01:01<00:00, 1.22s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"11 Done.\n"
]
}
],
"source": [
"pipe = create_pipe()\n",
"prompts = create_prompts()\n",
"document_number = 0\n",
"for prompt in prompts:\n",
" create_image(pipe, document_number,prompt)\n",
" document_number += 1"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment