Last active
March 18, 2025 22:51
-
-
Save turingbirds/a652120c3b5238ab7a655fab8063b148 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get the distance and orientation of a QR code and plot a little XYZ axis gizmo in red/green/blue on top of the image. | |
# This gets really nice low latency across a local wifi network, but the QR code detection can be a bit jumpy sometimes. | |
# We try both the zbar and opencv libraries for QR code detection. | |
# | |
# This software is distributed under the "CC0 1.0 Universal (CC0 1.0)" license. | |
# You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. | |
import datetime | |
import math | |
import numpy as np | |
import os | |
import threading | |
import time | |
import zbar | |
from picamera2 import Picamera2 | |
import libcamera | |
from flask import Flask, render_template, Response | |
import cv2 | |
def get_qr_code_corner_points(array: np.ndarray): | |
""" | |
Detect the corner points of QR codes in a uint8 NumPy array using zbar. | |
The array can be grayscale (H x W) or RGB (H x W x 3). | |
Returns a list of lists, where each inner list contains tuples of (x, y) | |
coordinates for the corners of one detected QR code. | |
""" | |
# Ensure we have a 2D grayscale image | |
if len(array.shape) == 3 and array.shape[2] == 3: | |
# Convert from RGB to grayscale | |
gray = (0.299 * array[:, :, 0] + | |
0.587 * array[:, :, 1] + | |
0.114 * array[:, :, 2]).astype(np.uint8) | |
else: | |
gray = array | |
height, width = gray.shape | |
raw_data = gray.tobytes() | |
# Initialize a zbar image scanner | |
scanner = zbar.ImageScanner() | |
scanner.parse_config('enable') | |
# Create a zbar Image | |
zbar_image = zbar.Image(width, height, 'Y800', raw_data) | |
# Scan for symbols (QR codes, etc.) | |
scanner.scan(zbar_image) | |
# List to store the corner points of all QR codes found | |
qr_corner_points = [] | |
qr_code_data = None | |
for symbol in zbar_image: | |
# The location attribute contains the corner points of the detected code | |
corner_points = symbol.location | |
qr_corner_points.append(corner_points) | |
qr_code_data = symbol.data | |
del zbar_image | |
return qr_corner_points, qr_code_data | |
def reconstruct_pose_from_frame(imgInput, points): | |
""" | |
camera matrix: | |
[ fx 0 cx ] | |
[ 0 fy cy ] | |
[ 0 0 1 ] | |
• The intrinsic camera parameters (fx, fy) – focal lengths in terms of pixel dimensions along the x and y axes. | |
• The principal point (cx, cy) – usually near the center of the image. | |
""" | |
height, width = imgInput.shape[:2] | |
cx = width / 2.0 | |
cy = height / 2.0 | |
focal_length = imgInput.shape[1] | |
camera_matrix = np.array([[focal_length, 0,cx], | |
[ 0, focal_length, cy], | |
[ 0, 0, 1]], dtype=np.float32) | |
dist_coeffs = np.zeros((4, 1), dtype=np.float32) | |
# 3D model points for the QR code corners (centered at (0,0)) | |
object_points = np.array([ | |
[-0.5, -0.5, 0], | |
[ 0.5, -0.5, 0], | |
[ 0.5, 0.5, 0], | |
[-0.5, 0.5, 0] | |
], dtype=np.float32) | |
# Solve for the pose (rvec, tvec) | |
# points[0] has shape (4,2): the four detected corners | |
retval, rvec, tvec = cv2.solvePnP(object_points, points[0], camera_matrix, dist_coeffs) | |
# Define a small axis in 3D to project (XYZ) | |
axis_3d_points = np.float32([ | |
[0, 0, 0], | |
[0.5, 0, 0], | |
[0, 0.5, 0], | |
[0, 0, -0.5] | |
]) | |
# Project 3D axis onto the 2D image | |
imgpts, _ = cv2.projectPoints(axis_3d_points, rvec, tvec, camera_matrix, dist_coeffs) | |
center = tuple(imgpts[0].ravel().astype(int)) | |
x_axis = tuple(imgpts[1].ravel().astype(int)) | |
y_axis = tuple(imgpts[2].ravel().astype(int)) | |
z_axis = tuple(imgpts[3].ravel().astype(int)) | |
# Draw the axes: red (X), green (Y), blue (Z) | |
cv2.line(imgInput, center, x_axis, (0, 0, 255), 5) | |
cv2.line(imgInput, center, y_axis, (0, 255, 0), 5) | |
cv2.line(imgInput, center, z_axis, (255, 0, 0), 5) | |
return imgInput | |
def find_qr_code(imgInput): | |
global opencv_qr_detector | |
points, qr_code_data = get_qr_code_corner_points(imgInput) | |
ret_qr = len(points) > 0 | |
if ret_qr: | |
print("\tQR code detected (zbar)!") | |
points = np.array(points) | |
cv2.polylines(imgInput, [points.astype(int)], isClosed=True, color=(64, 128, 255), thickness=5) | |
imgInput = reconstruct_pose_from_frame(imgInput, points.astype(np.float32)) | |
imgInput = qr_code_distance_estimation(imgInput, points.astype(np.float32)) | |
ret_qr, points = opencv_qr_detector.detect(imgInput) | |
if ret_qr: | |
print("\tQR code detected (opencv)!") | |
cv2.polylines(imgInput, [points[0].astype(int)], isClosed=True, color=(128, 255, 0), thickness=5) | |
qr_code_data, _ = opencv_qr_detector.decode(imgInput, points) | |
imgInput = reconstruct_pose_from_frame(imgInput, points.astype(np.float32)) | |
imgInput = qr_code_distance_estimation(imgInput, points.astype(np.float32)) | |
if qr_code_data: | |
height, width = imgInput.shape[:2] | |
y = height - 10 | |
s = "Data: " + qr_code_data | |
cv2.putText(imgInput, s, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 6) | |
cv2.putText(imgInput, s, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) | |
return imgInput | |
def qr_code_distance_estimation(imgInput, corners, qr_size_mm=50.): | |
distance_mm = None | |
if len(corners) == 1: | |
corners = corners[0] | |
assert len(corners) == 4 | |
# Calculate the perimeter of the QR code in pixels | |
perimeter_px = 0 | |
for i in range(4): | |
p1 = corners[i] | |
p2 = corners[(i + 1) % 4] | |
perimeter_px += np.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2) | |
# Average side length in pixels | |
side_length_px = perimeter_px / 4 | |
# Calculate focal length (you would typically calibrate this) | |
# For simplicity, we'll use a formula based on the camera's field of view | |
# Assuming a 60-degree horizontal field of view for a typical webcam | |
image_width = imgInput.shape[1] | |
focal_length_px = image_width / (2 * math.tan(math.radians(60) / 2)) | |
# Calculate distance using similar triangles | |
distance_mm = (qr_size_mm * focal_length_px) / side_length_px | |
distance_text = f"Distance: {distance_mm:.1f} mm ({distance_mm/1000:.2f} m)" | |
cv2.putText(imgInput, distance_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 6) | |
cv2.putText(imgInput, distance_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) | |
return imgInput | |
app = Flask(__name__, template_folder='/home/pi/adeept_darkpaw/server') | |
picam2 = None | |
opencv_qr_detector = cv2.QRCodeDetector() | |
@app.before_first_request | |
def initialize(): | |
global picam2 | |
# Run the app on all interfaces so it is accessible from your network | |
picam2 = Picamera2() | |
print("Available sensor modes (resolutions):") | |
# Each mode is a dict with a "size" key among others. | |
for mode in picam2.sensor_modes: | |
print("\t" + str(mode.get("size"))) | |
picam2.configure(picam2.create_still_configuration(main={"size": (640, 360)}, | |
transform = libcamera.Transform(vflip=True))) | |
time.sleep(1) | |
picam2.start() | |
time.sleep(1) | |
def gen_frames(): | |
global picam2 | |
while True: | |
success = True | |
print("Capturing frame...") | |
frame = picam2.capture_array() | |
print(frame.dtype) | |
print(frame.shape) | |
if not success: | |
break | |
else: | |
#frame = cv2.flip(frame, 0) # flip up-down | |
frame = find_qr_code(frame) | |
frame = frame[..., [2, 1, 0]] # swap B and R channels | |
ret, buffer = cv2.imencode('.jpg', frame) | |
frame = buffer.tobytes() | |
# Concatenate frame with HTTP multipart headers | |
yield (b'--frame\r\n' | |
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') | |
@app.route('/') | |
def index(): | |
# Render an HTML template that displays the video stream | |
return render_template('index.html') | |
@app.route('/video_feed') | |
def video_feed(): | |
# Return the response generated along with the specific media type (mime type) | |
return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame') | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=5001, debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<title>Picamera2 Stream</title> | |
</head> | |
<body> | |
<h1>Live Camera Feed</h1> | |
<img src="{{ url_for('video_feed') }}" alt="Camera Feed"> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment