Created
January 13, 2025 01:31
-
-
Save jamessdixon/5a70d8e5850c1dadc093ace2eb2147a5 to your computer and use it in GitHub Desktop.
create_dummy_documents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!pip install transformers diffusers torch accelerate" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import io\n", | |
"import os\n", | |
"import requests\n", | |
"from diffusers import StableDiffusionPipeline\n", | |
"import torch\n", | |
"from PIL import Image\n", | |
"from io import BytesIO" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_prompts():\n", | |
" prompts = []\n", | |
" document_types = ['hawaii drivers license with character name, picture, and address', 'english passport with character name, picture, and address', 'cambridge university student id with character name, picture, and address']\n", | |
" characters = ['King Arthur', 'Sir Lancelot', 'Sir Galahad', 'Sir Robin']\n", | |
" for document_type in document_types:\n", | |
" for character in characters:\n", | |
" prompt = f\"Create a {document_type} for {character}.\"\n", | |
" prompts.append(prompt)\n", | |
" return prompts\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_pipe():\n", | |
" model_id = \"stabilityai/stable-diffusion-2\"\n", | |
" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)\n", | |
" pipe.to(\"mps\")\n", | |
" return pipe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def create_image(pipe, document_number, prompt):\n", | |
" generated_image = pipe(prompt).images[0]\n", | |
" image_dir = os.path.join(os.curdir, 'data')\n", | |
" image_path = os.path.join(image_dir, str(document_number) + '.png')\n", | |
"\n", | |
" with open(image_path, \"wb\") as image_file:\n", | |
" with BytesIO() as img_byte_arr:\n", | |
" generated_image.save(img_byte_arr, format='PNG')\n", | |
" img_byte_arr.seek(0)\n", | |
" image_file.write(img_byte_arr.read())\n", | |
"\n", | |
" print(f\"{document_number} Done.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: \n", | |
"```\n", | |
"pip install accelerate\n", | |
"```\n", | |
".\n", | |
"Loading pipeline components...: 100%|██████████| 6/6 [00:02<00:00, 2.14it/s]\n", | |
"100%|██████████| 50/50 [00:53<00:00, 1.06s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [00:52<00:00, 1.06s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [00:54<00:00, 1.10s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:01<00:00, 1.23s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"3 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:54<00:00, 2.29s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"4 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [00:55<00:00, 1.11s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"5 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [00:58<00:00, 1.17s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"6 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:09<00:00, 1.40s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"7 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:08<00:00, 1.36s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"8 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:04<00:00, 1.29s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"9 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:01<00:00, 1.23s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"10 Done.\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 50/50 [01:01<00:00, 1.22s/it]\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"11 Done.\n" | |
] | |
} | |
], | |
"source": [ | |
"pipe = create_pipe()\n", | |
"prompts = create_prompts()\n", | |
"document_number = 0\n", | |
"for prompt in prompts:\n", | |
" create_image(pipe, document_number,prompt)\n", | |
" document_number += 1" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment