Skip to content

Instantly share code, notes, and snippets.

@turingbirds
Last active March 18, 2025 22:51
Show Gist options
  • Save turingbirds/a652120c3b5238ab7a655fab8063b148 to your computer and use it in GitHub Desktop.
Save turingbirds/a652120c3b5238ab7a655fab8063b148 to your computer and use it in GitHub Desktop.
# Get the distance and orientation of a QR code and plot a little XYZ axis gizmo in red/green/blue on top of the image.
# This gets really nice low latency across a local wifi network, but the QR code detection can be a bit jumpy sometimes.
# We try both the zbar and opencv libraries for QR code detection.
#
# This software is distributed under the "CC0 1.0 Universal (CC0 1.0)" license.
# You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
import datetime
import math
import numpy as np
import os
import threading
import time
import zbar
from picamera2 import Picamera2
import libcamera
from flask import Flask, render_template, Response
import cv2
def get_qr_code_corner_points(array: np.ndarray):
"""
Detect the corner points of QR codes in a uint8 NumPy array using zbar.
The array can be grayscale (H x W) or RGB (H x W x 3).
Returns a list of lists, where each inner list contains tuples of (x, y)
coordinates for the corners of one detected QR code.
"""
# Ensure we have a 2D grayscale image
if len(array.shape) == 3 and array.shape[2] == 3:
# Convert from RGB to grayscale
gray = (0.299 * array[:, :, 0] +
0.587 * array[:, :, 1] +
0.114 * array[:, :, 2]).astype(np.uint8)
else:
gray = array
height, width = gray.shape
raw_data = gray.tobytes()
# Initialize a zbar image scanner
scanner = zbar.ImageScanner()
scanner.parse_config('enable')
# Create a zbar Image
zbar_image = zbar.Image(width, height, 'Y800', raw_data)
# Scan for symbols (QR codes, etc.)
scanner.scan(zbar_image)
# List to store the corner points of all QR codes found
qr_corner_points = []
qr_code_data = None
for symbol in zbar_image:
# The location attribute contains the corner points of the detected code
corner_points = symbol.location
qr_corner_points.append(corner_points)
qr_code_data = symbol.data
del zbar_image
return qr_corner_points, qr_code_data
def reconstruct_pose_from_frame(imgInput, points):
"""
camera matrix:
[ fx 0 cx ]
[ 0 fy cy ]
[ 0 0 1 ]
• The intrinsic camera parameters (fx, fy) – focal lengths in terms of pixel dimensions along the x and y axes.
• The principal point (cx, cy) – usually near the center of the image.
"""
height, width = imgInput.shape[:2]
cx = width / 2.0
cy = height / 2.0
focal_length = imgInput.shape[1]
camera_matrix = np.array([[focal_length, 0,cx],
[ 0, focal_length, cy],
[ 0, 0, 1]], dtype=np.float32)
dist_coeffs = np.zeros((4, 1), dtype=np.float32)
# 3D model points for the QR code corners (centered at (0,0))
object_points = np.array([
[-0.5, -0.5, 0],
[ 0.5, -0.5, 0],
[ 0.5, 0.5, 0],
[-0.5, 0.5, 0]
], dtype=np.float32)
# Solve for the pose (rvec, tvec)
# points[0] has shape (4,2): the four detected corners
retval, rvec, tvec = cv2.solvePnP(object_points, points[0], camera_matrix, dist_coeffs)
# Define a small axis in 3D to project (XYZ)
axis_3d_points = np.float32([
[0, 0, 0],
[0.5, 0, 0],
[0, 0.5, 0],
[0, 0, -0.5]
])
# Project 3D axis onto the 2D image
imgpts, _ = cv2.projectPoints(axis_3d_points, rvec, tvec, camera_matrix, dist_coeffs)
center = tuple(imgpts[0].ravel().astype(int))
x_axis = tuple(imgpts[1].ravel().astype(int))
y_axis = tuple(imgpts[2].ravel().astype(int))
z_axis = tuple(imgpts[3].ravel().astype(int))
# Draw the axes: red (X), green (Y), blue (Z)
cv2.line(imgInput, center, x_axis, (0, 0, 255), 5)
cv2.line(imgInput, center, y_axis, (0, 255, 0), 5)
cv2.line(imgInput, center, z_axis, (255, 0, 0), 5)
return imgInput
def find_qr_code(imgInput):
global opencv_qr_detector
points, qr_code_data = get_qr_code_corner_points(imgInput)
ret_qr = len(points) > 0
if ret_qr:
print("\tQR code detected (zbar)!")
points = np.array(points)
cv2.polylines(imgInput, [points.astype(int)], isClosed=True, color=(64, 128, 255), thickness=5)
imgInput = reconstruct_pose_from_frame(imgInput, points.astype(np.float32))
imgInput = qr_code_distance_estimation(imgInput, points.astype(np.float32))
ret_qr, points = opencv_qr_detector.detect(imgInput)
if ret_qr:
print("\tQR code detected (opencv)!")
cv2.polylines(imgInput, [points[0].astype(int)], isClosed=True, color=(128, 255, 0), thickness=5)
qr_code_data, _ = opencv_qr_detector.decode(imgInput, points)
imgInput = reconstruct_pose_from_frame(imgInput, points.astype(np.float32))
imgInput = qr_code_distance_estimation(imgInput, points.astype(np.float32))
if qr_code_data:
height, width = imgInput.shape[:2]
y = height - 10
s = "Data: " + qr_code_data
cv2.putText(imgInput, s, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 6)
cv2.putText(imgInput, s, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
return imgInput
def qr_code_distance_estimation(imgInput, corners, qr_size_mm=50.):
distance_mm = None
if len(corners) == 1:
corners = corners[0]
assert len(corners) == 4
# Calculate the perimeter of the QR code in pixels
perimeter_px = 0
for i in range(4):
p1 = corners[i]
p2 = corners[(i + 1) % 4]
perimeter_px += np.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)
# Average side length in pixels
side_length_px = perimeter_px / 4
# Calculate focal length (you would typically calibrate this)
# For simplicity, we'll use a formula based on the camera's field of view
# Assuming a 60-degree horizontal field of view for a typical webcam
image_width = imgInput.shape[1]
focal_length_px = image_width / (2 * math.tan(math.radians(60) / 2))
# Calculate distance using similar triangles
distance_mm = (qr_size_mm * focal_length_px) / side_length_px
distance_text = f"Distance: {distance_mm:.1f} mm ({distance_mm/1000:.2f} m)"
cv2.putText(imgInput, distance_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 6)
cv2.putText(imgInput, distance_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
return imgInput
app = Flask(__name__, template_folder='/home/pi/adeept_darkpaw/server')
picam2 = None
opencv_qr_detector = cv2.QRCodeDetector()
@app.before_first_request
def initialize():
global picam2
# Run the app on all interfaces so it is accessible from your network
picam2 = Picamera2()
print("Available sensor modes (resolutions):")
# Each mode is a dict with a "size" key among others.
for mode in picam2.sensor_modes:
print("\t" + str(mode.get("size")))
picam2.configure(picam2.create_still_configuration(main={"size": (640, 360)},
transform = libcamera.Transform(vflip=True)))
time.sleep(1)
picam2.start()
time.sleep(1)
def gen_frames():
global picam2
while True:
success = True
print("Capturing frame...")
frame = picam2.capture_array()
print(frame.dtype)
print(frame.shape)
if not success:
break
else:
#frame = cv2.flip(frame, 0) # flip up-down
frame = find_qr_code(frame)
frame = frame[..., [2, 1, 0]] # swap B and R channels
ret, buffer = cv2.imencode('.jpg', frame)
frame = buffer.tobytes()
# Concatenate frame with HTTP multipart headers
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
@app.route('/')
def index():
# Render an HTML template that displays the video stream
return render_template('index.html')
@app.route('/video_feed')
def video_feed():
# Return the response generated along with the specific media type (mime type)
return Response(gen_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5001, debug=True)
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Picamera2 Stream</title>
</head>
<body>
<h1>Live Camera Feed</h1>
<img src="{{ url_for('video_feed') }}" alt="Camera Feed">
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment