Created
January 24, 2025 05:34
-
-
Save jamessdixon/19c71fab19e7e7902cfe8312ad7e860e to your computer and use it in GitHub Desktop.
image correction
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Processed image saved at data_new\\0_altered.png\n" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"from PIL import Image, ImageEnhance\n", | |
"\n", | |
"def process_image(image_path, output_path):\n", | |
" with Image.open(image_path) as img:\n", | |
" rotated_img = img.rotate(180)\n", | |
" enhancer = ImageEnhance.Brightness(rotated_img)\n", | |
" darkened_img = enhancer.enhance(0.5)\n", | |
" darkened_img.save(output_path)\n", | |
"\n", | |
"input_folder = 'data_new'\n", | |
"output_folder = 'data_new'\n", | |
"input_filename = '0.png'\n", | |
"output_filename = '0_altered.png'\n", | |
"\n", | |
"input_path = os.path.join(input_folder, input_filename)\n", | |
"output_path = os.path.join(output_folder, output_filename)\n", | |
"process_image(input_path, output_path)\n", | |
"\n", | |
"print(f\"Processed image saved at {output_path}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Image: 0_altered.png\n", | |
"Predicted class: A driver's license\n", | |
"Confidence: 0.72\n", | |
"------------------------------\n" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"from transformers import CLIPProcessor, CLIPModel\n", | |
"from PIL import Image\n", | |
"\n", | |
"\n", | |
"model = CLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\")\n", | |
"processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")\n", | |
"descriptions = [\"A driver's license\", \"A passport\", \"A student ID\"]\n", | |
"\n", | |
"image_folder = \"data_new\"\n", | |
"filename = '0_altered.png'\n", | |
"image_path = os.path.join(image_folder, filename)\n", | |
"image = Image.open(image_path)\n", | |
"inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n", | |
"outputs = model(**inputs)\n", | |
"logits_per_image = outputs.logits_per_image \n", | |
"probs = logits_per_image.softmax(dim=1) \n", | |
"\n", | |
"predicted_class = descriptions[probs.argmax()]\n", | |
"confidence = probs.max().item()\n", | |
"\n", | |
"print(f\"Image: {filename}\")\n", | |
"print(f\"Predicted class: {predicted_class}\")\n", | |
"print(f\"Confidence: {confidence:.2f}\")\n", | |
"print(\"-\" * 30)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Best Rotation Increment: 180\n", | |
"Best Brightness Increment: 100\n", | |
"Best Confidence: 0.99\n" | |
] | |
} | |
], | |
"source": [ | |
"import io\n", | |
"import os\n", | |
"from PIL import Image, ImageEnhance\n", | |
"\n", | |
"#Jamie - mention gradiant descent here - this is a simple implementaiton of a loss function\n", | |
"def pre_process_image(image_path, rotation_increment, brightness_increment):\n", | |
" with Image.open(image_path) as img: \n", | |
" rotated_img = img.rotate(rotation_increment)\n", | |
" enhancer = ImageEnhance.Brightness(rotated_img)\n", | |
" brightness = 1 + (brightness_increment * .01)\n", | |
" brightened_img = enhancer.enhance(brightness)\n", | |
" img_byte_arr = io.BytesIO()\n", | |
" brightened_img.save(img_byte_arr, format='JPEG')\n", | |
" img_byte_arr.seek(0)\n", | |
" return img_byte_arr\n", | |
"\n", | |
"\n", | |
"image_folder = 'data_new'\n", | |
"filename = '0_altered.png'\n", | |
"image_path = os.path.join(image_folder, filename)\n", | |
"\n", | |
"best_confidence = 0\n", | |
"best_rotation_increment = 0\n", | |
"best_brightness_increment = 0\n", | |
"\n", | |
"for rotation_increment in range(0, 181, 10):\n", | |
" for brightness_increment in range(0, 101, 10):\n", | |
" pre_processed_image = pre_process_image(image_path,rotation_increment,brightness_increment)\n", | |
" image = Image.open(pre_processed_image)\n", | |
" inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n", | |
" outputs = model(**inputs)\n", | |
" logits_per_image = outputs.logits_per_image \n", | |
" probs = logits_per_image.softmax(dim=1) \n", | |
" predicted_class = descriptions[probs.argmax()]\n", | |
" confidence = probs.max().item()\n", | |
" if confidence > best_confidence:\n", | |
" best_confidence = confidence\n", | |
" best_rotation_increment = rotation_increment\n", | |
" best_brightness_increment = brightness_increment\n", | |
"\n", | |
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n", | |
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n", | |
"print(f\"Best Confidence: {best_confidence:.2f}\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Number of cores available: 22\n" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"from PIL import Image, ImageEnhance\n", | |
"import io\n", | |
"import multiprocessing\n", | |
"from multiprocessing import Pool\n", | |
"\n", | |
"print(f\"Number of cores available: {multiprocessing.cpu_count()}\")\n", | |
"\n", | |
"def pre_process_image(params):\n", | |
" image_path, rotation_increment, brightness_increment = params\n", | |
" with Image.open(image_path) as img:\n", | |
" rotated_img = img.rotate(rotation_increment)\n", | |
" enhancer = ImageEnhance.Brightness(rotated_img)\n", | |
" brightness = 1 + (brightness_increment * 0.01)\n", | |
" brightened_img = enhancer.enhance(brightness)\n", | |
" img_byte_arr = io.BytesIO()\n", | |
" brightened_img.save(img_byte_arr, format='JPEG')\n", | |
" img_byte_arr.seek(0)\n", | |
" return img_byte_arr, rotation_increment, brightness_increment\n", | |
"\n", | |
"def process_and_calculate_confidence(params):\n", | |
" pre_processed_image, rotation_increment, brightness_increment = pre_process_image(params)\n", | |
" image = Image.open(pre_processed_image)\n", | |
" inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n", | |
" outputs = model(**inputs)\n", | |
" logits_per_image = outputs.logits_per_image \n", | |
" probs = logits_per_image.softmax(dim=1)\n", | |
" predicted_class = descriptions[probs.argmax()]\n", | |
" confidence = probs.max().item()\n", | |
" return confidence, rotation_increment, brightness_increment\n", | |
"\n", | |
"image_folder = 'data_new'\n", | |
"filename = '0_altered.png'\n", | |
"image_path = os.path.join(image_folder, filename)\n", | |
"\n", | |
"best_confidence = 0\n", | |
"best_rotation_increment = 0\n", | |
"best_brightness_increment = 0\n", | |
"\n", | |
"params_list = [(image_path, rotation_increment, brightness_increment) \n", | |
" for rotation_increment in range(0, 181, 10) \n", | |
" for brightness_increment in range(0, 101, 10)]\n", | |
"\n", | |
"with Pool() as pool:\n", | |
" results = pool.map(process_and_calculate_confidence, params_list)\n", | |
"\n", | |
"for confidence, rotation_increment, brightness_increment in results:\n", | |
" if confidence > best_confidence:\n", | |
" best_confidence = confidence\n", | |
" best_rotation_increment = rotation_increment\n", | |
" best_brightness_increment = brightness_increment\n", | |
"\n", | |
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n", | |
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n", | |
"print(f\"Best Confidence: {best_confidence:.2f}\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AssertionError", | |
"evalue": "Torch not compiled with CUDA enabled", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[1;32mIn[1], line 45\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m rotation_increment \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m181\u001b[39m, \u001b[38;5;241m10\u001b[39m):\n\u001b[0;32m 44\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m brightness_increment \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m101\u001b[39m, \u001b[38;5;241m10\u001b[39m):\n\u001b[1;32m---> 45\u001b[0m confidence, rotation_increment, brightness_increment \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_and_calculate_confidence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrotation_increment\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbrightness_increment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m confidence \u001b[38;5;241m>\u001b[39m best_confidence:\n\u001b[0;32m 48\u001b[0m best_confidence \u001b[38;5;241m=\u001b[39m confidence\n", | |
"Cell \u001b[1;32mIn[1], line 23\u001b[0m, in \u001b[0;36mprocess_and_calculate_confidence\u001b[1;34m(image_path, rotation_increment, brightness_increment)\u001b[0m\n\u001b[0;32m 21\u001b[0m image \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mopen(pre_processed_image)\n\u001b[0;32m 22\u001b[0m transform \u001b[38;5;241m=\u001b[39m transforms\u001b[38;5;241m.\u001b[39mToTensor()\n\u001b[1;32m---> 23\u001b[0m image_tensor \u001b[38;5;241m=\u001b[39m \u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munsqueeze\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 25\u001b[0m inputs \u001b[38;5;241m=\u001b[39m processor(text\u001b[38;5;241m=\u001b[39mdescriptions, images\u001b[38;5;241m=\u001b[39mimage_tensor, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 26\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n", | |
"File \u001b[1;32mc:\\Users\\dixonjames\\Documents\\trained\\.venv\\Lib\\site-packages\\torch\\cuda\\__init__.py:310\u001b[0m, in \u001b[0;36m_lazy_init\u001b[1;34m()\u001b[0m\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[0;32m 306\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot re-initialize CUDA in forked subprocess. To use CUDA with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultiprocessing, you must use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspawn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m start method\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 308\u001b[0m )\n\u001b[0;32m 309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(torch\u001b[38;5;241m.\u001b[39m_C, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_cuda_getDeviceCount\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 310\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTorch not compiled with CUDA enabled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _cudart \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 312\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\n\u001b[0;32m 313\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlibcudart functions unavailable. It looks like you have a broken build?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 314\u001b[0m )\n", | |
"\u001b[1;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled" | |
] | |
} | |
], | |
"source": [ | |
"import os\n", | |
"from PIL import Image, ImageEnhance\n", | |
"import io\n", | |
"import torch\n", | |
"from torch import nn\n", | |
"from torchvision import transforms\n", | |
"\n", | |
"def pre_process_image(image_path, rotation_increment, brightness_increment):\n", | |
" with Image.open(image_path) as img:\n", | |
" rotated_img = img.rotate(rotation_increment)\n", | |
" enhancer = ImageEnhance.Brightness(rotated_img)\n", | |
" brightness = 1 + (brightness_increment * 0.01)\n", | |
" brightened_img = enhancer.enhance(brightness)\n", | |
" img_byte_arr = io.BytesIO()\n", | |
" brightened_img.save(img_byte_arr, format='JPEG')\n", | |
" img_byte_arr.seek(0)\n", | |
" return img_byte_arr\n", | |
"\n", | |
"def process_and_calculate_confidence(image_path, rotation_increment, brightness_increment):\n", | |
" pre_processed_image = pre_process_image(image_path, rotation_increment, brightness_increment)\n", | |
" image = Image.open(pre_processed_image)\n", | |
" transform = transforms.ToTensor()\n", | |
" image_tensor = transform(image).unsqueeze(0).cuda()\n", | |
" \n", | |
" inputs = processor(text=descriptions, images=image_tensor, return_tensors=\"pt\", padding=True).to('cuda')\n", | |
" outputs = model(**inputs)\n", | |
" logits_per_image = outputs.logits_per_image \n", | |
" probs = logits_per_image.softmax(dim=1)\n", | |
"\n", | |
" predicted_class = descriptions[probs.argmax()]\n", | |
" confidence = probs.max().item()\n", | |
" \n", | |
" return confidence, rotation_increment, brightness_increment\n", | |
"\n", | |
"image_folder = 'data_new'\n", | |
"filename = '0_altered.png'\n", | |
"image_path = os.path.join(image_folder, filename)\n", | |
"\n", | |
"best_confidence = 0\n", | |
"best_rotation_increment = 0\n", | |
"best_brightness_increment = 0\n", | |
"\n", | |
"for rotation_increment in range(0, 181, 10):\n", | |
" for brightness_increment in range(0, 101, 10):\n", | |
" confidence, rotation_increment, brightness_increment = process_and_calculate_confidence(image_path, rotation_increment, brightness_increment)\n", | |
" \n", | |
" if confidence > best_confidence:\n", | |
" best_confidence = confidence\n", | |
" best_rotation_increment = rotation_increment\n", | |
" best_brightness_increment = brightness_increment\n", | |
"\n", | |
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n", | |
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n", | |
"print(f\"Best Confidence: {best_confidence:.2f}\")\n", | |
"\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment