chrismatthieu · March 6, 2025 18:46
diff --git a/ros-twist.py b/ros-twist.py
 import pyrealsense2 as rs
 import numpy as np
 import cv2
 import json  # Add import for JSON

 # Load YOLOv3 model
 net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
 layer_names = net.getLayerNames()
 output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

 # Initialize RealSense camera
 pipeline = rs.pipeline()
 config = rs.config()
 config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
 config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
 pipeline.start(config)

 try:
    while True:
        # Wait for a coherent pair of frames: depth and color
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        depth_frame = frames.get_depth_frame()
        if not color_frame or not depth_frame:
            continue

        # Convert images to numpy arrays
        color_image = np.asanyarray(color_frame.get_data())
        depth_image = np.asanyarray(depth_frame.get_data())

        # Detect objects
        blob = cv2.dnn.blobFromImage(color_image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        outs = net.forward(output_layers)

        # Analyze detections
        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5 and class_id == 0:  # class_id == 0 is for 'person'
                    center_x = int(detection[0] * 640)
                    center_y = int(detection[1] * 480)
                    w = int(detection[2] * 640)
                    h = int(detection[3] * 480)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        # Draw bounding boxes
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str("Person")
                center_x = x + w // 2
                center_y = y + h // 2
                distance = depth_image[center_y, center_x] * depth_frame.get_units()
                cv2.rectangle(color_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(color_image, f"{label} {distance:.2f}m", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                # Print ROS2 twist JSON string based on distance
                if distance < 1.0:
                    twist = {
                        "linear": {"x": 0.0, "y": 0.0, "z": 0.0},
                        "angular": {"x": 0.0, "y": 0.0, "z": 0.0}
                    }
                else:
                    twist = {
                        "linear": {"x": 0.3, "y": 0.0, "z": 0.0},
                        "angular": {"x": 0.0, "y": 0.0, "z": 0.0}
                    }
                print(json.dumps(twist))

        # Display the resulting frame
        cv2.imshow('RealSense', color_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
 finally:
    # Stop streaming
    pipeline.stop()
    cv2.destroyAllWindows()
	import pyrealsense2 as rs
	import numpy as np
	import cv2
	import json # Add import for JSON

	# Load YOLOv3 model
	net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
	layer_names = net.getLayerNames()
	output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

	# Initialize RealSense camera
	pipeline = rs.pipeline()
	config = rs.config()
	config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
	config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
	pipeline.start(config)

	try:
	while True:
	# Wait for a coherent pair of frames: depth and color
	frames = pipeline.wait_for_frames()
	color_frame = frames.get_color_frame()
	depth_frame = frames.get_depth_frame()
	if not color_frame or not depth_frame:
	continue

	# Convert images to numpy arrays
	color_image = np.asanyarray(color_frame.get_data())
	depth_image = np.asanyarray(depth_frame.get_data())

	# Detect objects
	blob = cv2.dnn.blobFromImage(color_image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
	net.setInput(blob)
	outs = net.forward(output_layers)

	# Analyze detections
	class_ids = []
	confidences = []
	boxes = []
	for out in outs:
	for detection in out:
	scores = detection[5:]
	class_id = np.argmax(scores)
	confidence = scores[class_id]
	if confidence > 0.5 and class_id == 0: # class_id == 0 is for 'person'
	center_x = int(detection[0] * 640)
	center_y = int(detection[1] * 480)
	w = int(detection[2] * 640)
	h = int(detection[3] * 480)
	x = int(center_x - w / 2)
	y = int(center_y - h / 2)
	boxes.append([x, y, w, h])
	confidences.append(float(confidence))
	class_ids.append(class_id)

	indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

	# Draw bounding boxes
	for i in range(len(boxes)):
	if i in indexes:
	x, y, w, h = boxes[i]
	label = str("Person")
	center_x = x + w // 2
	center_y = y + h // 2
	distance = depth_image[center_y, center_x] * depth_frame.get_units()
	cv2.rectangle(color_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
	cv2.putText(color_image, f"{label} {distance:.2f}m", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	# Print ROS2 twist JSON string based on distance
	if distance < 1.0:
	twist = {
	"linear": {"x": 0.0, "y": 0.0, "z": 0.0},
	"angular": {"x": 0.0, "y": 0.0, "z": 0.0}
	}
	else:
	twist = {
	"linear": {"x": 0.3, "y": 0.0, "z": 0.0},
	"angular": {"x": 0.0, "y": 0.0, "z": 0.0}
	}
	print(json.dumps(twist))

	# Display the resulting frame
	cv2.imshow('RealSense', color_image)
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break
	finally:
	# Stop streaming
	pipeline.stop()
	cv2.destroyAllWindows()