Skip to content

Instantly share code, notes, and snippets.

@jamessdixon
Created January 24, 2025 05:34
Show Gist options
  • Save jamessdixon/19c71fab19e7e7902cfe8312ad7e860e to your computer and use it in GitHub Desktop.
Save jamessdixon/19c71fab19e7e7902cfe8312ad7e860e to your computer and use it in GitHub Desktop.
image correction
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processed image saved at data_new\\0_altered.png\n"
]
}
],
"source": [
"import os\n",
"from PIL import Image, ImageEnhance\n",
"\n",
"def process_image(image_path, output_path):\n",
" with Image.open(image_path) as img:\n",
" rotated_img = img.rotate(180)\n",
" enhancer = ImageEnhance.Brightness(rotated_img)\n",
" darkened_img = enhancer.enhance(0.5)\n",
" darkened_img.save(output_path)\n",
"\n",
"input_folder = 'data_new'\n",
"output_folder = 'data_new'\n",
"input_filename = '0.png'\n",
"output_filename = '0_altered.png'\n",
"\n",
"input_path = os.path.join(input_folder, input_filename)\n",
"output_path = os.path.join(output_folder, output_filename)\n",
"process_image(input_path, output_path)\n",
"\n",
"print(f\"Processed image saved at {output_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Image: 0_altered.png\n",
"Predicted class: A driver's license\n",
"Confidence: 0.72\n",
"------------------------------\n"
]
}
],
"source": [
"import os\n",
"from transformers import CLIPProcessor, CLIPModel\n",
"from PIL import Image\n",
"\n",
"\n",
"model = CLIPModel.from_pretrained(\"openai/clip-vit-base-patch32\")\n",
"processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")\n",
"descriptions = [\"A driver's license\", \"A passport\", \"A student ID\"]\n",
"\n",
"image_folder = \"data_new\"\n",
"filename = '0_altered.png'\n",
"image_path = os.path.join(image_folder, filename)\n",
"image = Image.open(image_path)\n",
"inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n",
"outputs = model(**inputs)\n",
"logits_per_image = outputs.logits_per_image \n",
"probs = logits_per_image.softmax(dim=1) \n",
"\n",
"predicted_class = descriptions[probs.argmax()]\n",
"confidence = probs.max().item()\n",
"\n",
"print(f\"Image: {filename}\")\n",
"print(f\"Predicted class: {predicted_class}\")\n",
"print(f\"Confidence: {confidence:.2f}\")\n",
"print(\"-\" * 30)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best Rotation Increment: 180\n",
"Best Brightness Increment: 100\n",
"Best Confidence: 0.99\n"
]
}
],
"source": [
"import io\n",
"import os\n",
"from PIL import Image, ImageEnhance\n",
"\n",
"#Jamie - mention gradiant descent here - this is a simple implementaiton of a loss function\n",
"def pre_process_image(image_path, rotation_increment, brightness_increment):\n",
" with Image.open(image_path) as img: \n",
" rotated_img = img.rotate(rotation_increment)\n",
" enhancer = ImageEnhance.Brightness(rotated_img)\n",
" brightness = 1 + (brightness_increment * .01)\n",
" brightened_img = enhancer.enhance(brightness)\n",
" img_byte_arr = io.BytesIO()\n",
" brightened_img.save(img_byte_arr, format='JPEG')\n",
" img_byte_arr.seek(0)\n",
" return img_byte_arr\n",
"\n",
"\n",
"image_folder = 'data_new'\n",
"filename = '0_altered.png'\n",
"image_path = os.path.join(image_folder, filename)\n",
"\n",
"best_confidence = 0\n",
"best_rotation_increment = 0\n",
"best_brightness_increment = 0\n",
"\n",
"for rotation_increment in range(0, 181, 10):\n",
" for brightness_increment in range(0, 101, 10):\n",
" pre_processed_image = pre_process_image(image_path,rotation_increment,brightness_increment)\n",
" image = Image.open(pre_processed_image)\n",
" inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n",
" outputs = model(**inputs)\n",
" logits_per_image = outputs.logits_per_image \n",
" probs = logits_per_image.softmax(dim=1) \n",
" predicted_class = descriptions[probs.argmax()]\n",
" confidence = probs.max().item()\n",
" if confidence > best_confidence:\n",
" best_confidence = confidence\n",
" best_rotation_increment = rotation_increment\n",
" best_brightness_increment = brightness_increment\n",
"\n",
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n",
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n",
"print(f\"Best Confidence: {best_confidence:.2f}\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of cores available: 22\n"
]
}
],
"source": [
"import os\n",
"from PIL import Image, ImageEnhance\n",
"import io\n",
"import multiprocessing\n",
"from multiprocessing import Pool\n",
"\n",
"print(f\"Number of cores available: {multiprocessing.cpu_count()}\")\n",
"\n",
"def pre_process_image(params):\n",
" image_path, rotation_increment, brightness_increment = params\n",
" with Image.open(image_path) as img:\n",
" rotated_img = img.rotate(rotation_increment)\n",
" enhancer = ImageEnhance.Brightness(rotated_img)\n",
" brightness = 1 + (brightness_increment * 0.01)\n",
" brightened_img = enhancer.enhance(brightness)\n",
" img_byte_arr = io.BytesIO()\n",
" brightened_img.save(img_byte_arr, format='JPEG')\n",
" img_byte_arr.seek(0)\n",
" return img_byte_arr, rotation_increment, brightness_increment\n",
"\n",
"def process_and_calculate_confidence(params):\n",
" pre_processed_image, rotation_increment, brightness_increment = pre_process_image(params)\n",
" image = Image.open(pre_processed_image)\n",
" inputs = processor(text=descriptions, images=image, return_tensors=\"pt\", padding=True)\n",
" outputs = model(**inputs)\n",
" logits_per_image = outputs.logits_per_image \n",
" probs = logits_per_image.softmax(dim=1)\n",
" predicted_class = descriptions[probs.argmax()]\n",
" confidence = probs.max().item()\n",
" return confidence, rotation_increment, brightness_increment\n",
"\n",
"image_folder = 'data_new'\n",
"filename = '0_altered.png'\n",
"image_path = os.path.join(image_folder, filename)\n",
"\n",
"best_confidence = 0\n",
"best_rotation_increment = 0\n",
"best_brightness_increment = 0\n",
"\n",
"params_list = [(image_path, rotation_increment, brightness_increment) \n",
" for rotation_increment in range(0, 181, 10) \n",
" for brightness_increment in range(0, 101, 10)]\n",
"\n",
"with Pool() as pool:\n",
" results = pool.map(process_and_calculate_confidence, params_list)\n",
"\n",
"for confidence, rotation_increment, brightness_increment in results:\n",
" if confidence > best_confidence:\n",
" best_confidence = confidence\n",
" best_rotation_increment = rotation_increment\n",
" best_brightness_increment = brightness_increment\n",
"\n",
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n",
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n",
"print(f\"Best Confidence: {best_confidence:.2f}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "AssertionError",
"evalue": "Torch not compiled with CUDA enabled",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[1], line 45\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m rotation_increment \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m181\u001b[39m, \u001b[38;5;241m10\u001b[39m):\n\u001b[0;32m 44\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m brightness_increment \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m101\u001b[39m, \u001b[38;5;241m10\u001b[39m):\n\u001b[1;32m---> 45\u001b[0m confidence, rotation_increment, brightness_increment \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_and_calculate_confidence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrotation_increment\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbrightness_increment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m confidence \u001b[38;5;241m>\u001b[39m best_confidence:\n\u001b[0;32m 48\u001b[0m best_confidence \u001b[38;5;241m=\u001b[39m confidence\n",
"Cell \u001b[1;32mIn[1], line 23\u001b[0m, in \u001b[0;36mprocess_and_calculate_confidence\u001b[1;34m(image_path, rotation_increment, brightness_increment)\u001b[0m\n\u001b[0;32m 21\u001b[0m image \u001b[38;5;241m=\u001b[39m Image\u001b[38;5;241m.\u001b[39mopen(pre_processed_image)\n\u001b[0;32m 22\u001b[0m transform \u001b[38;5;241m=\u001b[39m transforms\u001b[38;5;241m.\u001b[39mToTensor()\n\u001b[1;32m---> 23\u001b[0m image_tensor \u001b[38;5;241m=\u001b[39m \u001b[43mtransform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munsqueeze\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 25\u001b[0m inputs \u001b[38;5;241m=\u001b[39m processor(text\u001b[38;5;241m=\u001b[39mdescriptions, images\u001b[38;5;241m=\u001b[39mimage_tensor, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 26\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n",
"File \u001b[1;32mc:\\Users\\dixonjames\\Documents\\trained\\.venv\\Lib\\site-packages\\torch\\cuda\\__init__.py:310\u001b[0m, in \u001b[0;36m_lazy_init\u001b[1;34m()\u001b[0m\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[0;32m 306\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot re-initialize CUDA in forked subprocess. To use CUDA with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 307\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultiprocessing, you must use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspawn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m start method\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 308\u001b[0m )\n\u001b[0;32m 309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(torch\u001b[38;5;241m.\u001b[39m_C, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_cuda_getDeviceCount\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 310\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTorch not compiled with CUDA enabled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _cudart \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 312\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\n\u001b[0;32m 313\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlibcudart functions unavailable. It looks like you have a broken build?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 314\u001b[0m )\n",
"\u001b[1;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled"
]
}
],
"source": [
"import os\n",
"from PIL import Image, ImageEnhance\n",
"import io\n",
"import torch\n",
"from torch import nn\n",
"from torchvision import transforms\n",
"\n",
"def pre_process_image(image_path, rotation_increment, brightness_increment):\n",
" with Image.open(image_path) as img:\n",
" rotated_img = img.rotate(rotation_increment)\n",
" enhancer = ImageEnhance.Brightness(rotated_img)\n",
" brightness = 1 + (brightness_increment * 0.01)\n",
" brightened_img = enhancer.enhance(brightness)\n",
" img_byte_arr = io.BytesIO()\n",
" brightened_img.save(img_byte_arr, format='JPEG')\n",
" img_byte_arr.seek(0)\n",
" return img_byte_arr\n",
"\n",
"def process_and_calculate_confidence(image_path, rotation_increment, brightness_increment):\n",
" pre_processed_image = pre_process_image(image_path, rotation_increment, brightness_increment)\n",
" image = Image.open(pre_processed_image)\n",
" transform = transforms.ToTensor()\n",
" image_tensor = transform(image).unsqueeze(0).cuda()\n",
" \n",
" inputs = processor(text=descriptions, images=image_tensor, return_tensors=\"pt\", padding=True).to('cuda')\n",
" outputs = model(**inputs)\n",
" logits_per_image = outputs.logits_per_image \n",
" probs = logits_per_image.softmax(dim=1)\n",
"\n",
" predicted_class = descriptions[probs.argmax()]\n",
" confidence = probs.max().item()\n",
" \n",
" return confidence, rotation_increment, brightness_increment\n",
"\n",
"image_folder = 'data_new'\n",
"filename = '0_altered.png'\n",
"image_path = os.path.join(image_folder, filename)\n",
"\n",
"best_confidence = 0\n",
"best_rotation_increment = 0\n",
"best_brightness_increment = 0\n",
"\n",
"for rotation_increment in range(0, 181, 10):\n",
" for brightness_increment in range(0, 101, 10):\n",
" confidence, rotation_increment, brightness_increment = process_and_calculate_confidence(image_path, rotation_increment, brightness_increment)\n",
" \n",
" if confidence > best_confidence:\n",
" best_confidence = confidence\n",
" best_rotation_increment = rotation_increment\n",
" best_brightness_increment = brightness_increment\n",
"\n",
"print(f\"Best Rotation Increment: {best_rotation_increment}\")\n",
"print(f\"Best Brightness Increment: {best_brightness_increment}\")\n",
"print(f\"Best Confidence: {best_confidence:.2f}\")\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment