Founding depth on bounding box

ergunerrdogan · October 11, 2024, 7:58am

Dear Relevant, I have an algorithm that tracks objects with Orbbec Gemini 335. I want to calculate the depth of the object I find and print it as x y z. However, I encounter an error every time. It either measures the depth incorrectly or it cannot. You will see the sample code below.


def rendering_frames():
    global color_frames_queue, depth_frames_queue  # Queues for color and depth frames
    global curr_device_cnt  # Current count of devices
    global stop_rendering  # Flag to stop rendering
    temporal_filter = TemporalFilter(alpha=0.5) # temporal filter init for depth 
    # Başlangıç pozisyonları
    window_x, window_y = 0, 0  # Birinci kamera pozisyonları
    window_x2, window_y2 = 0, 1200  # İkinci kamera pozisyonları

    window_width, window_height = 430, 300  # Pencere boyutları

    while not stop_rendering:
        # Loop through each device
        for i in range(curr_device_cnt):
            # Initialize color and depth frames
            color_frame = None
            depth_frame = None

            # Get the latest color frame from the queue
            if not color_frames_queue[i].empty():
                color_frame = color_frames_queue[i].get()

            # Get the latest depth frame from the queue
            if not depth_frames_queue[i].empty():
                depth_frame = depth_frames_queue[i].get()

            # If both frames are None, skip to the next device
            if color_frame is None and depth_frame is None:
                continue

            # Process the color frame
            if color_frame is not None:
                # Convert the frame to BGR format for OpenCV
                color_image = frame_to_bgr_image(color_frame)
                # Resize the image for YOLOv8 (expected input size)
                color_image = cv2.resize(color_image, (640, 640))

                # Run  detection on the color image
                results = model(color_image)

                segmented_image = segment_and_filter(color_image)

                window_name_segmented = f"Device {i} Segmented"

                if i == 0:  # Birinci kamera
                    cv2.imshow(window_name_segmented, cv2.resize(segmented_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_segmented, window_x + 503, window_y)
                elif i == 1:  # İkinci kamera
                    cv2.imshow(window_name_segmented, cv2.resize(segmented_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_segmented, window_x2 + 503, window_y2)

                # Loop through detections
                for result in results[0].boxes:
                    # Extract bounding box coordinates
                    x1, y1, x2, y2 = result.xyxy[0]  # Bounding box corners
                    class_id = result.cls  # Class ID
                    conf = result.conf  # Confidence score

                    # Only process the '' class (assuming its class ID is 0)
                    if class_id == 0 and conf >= 0.70:  
                        # Calculate the center of the bounding box
                        center_x = int((x1 + x2) / 2)
                        center_y = int((y1 + y2) / 2)
                        if i == 0:
                            region_num = (center_x // (color_image.shape[1] // 7)) + 8  # 1 to 7
                        else:
                            region_num = (center_x // (color_image.shape[1] // 7)) + 1  # 8 to 14

                        # Check if depth frame is available and the coordinates are valid
                        if depth_frame is not None:
                            width = depth_frame.get_width()
                            height = depth_frame.get_height()
                            scale = depth_frame.get_depth_scale()

                            # Ensure the center_x and center_y are within the depth frame's dimensions
                            if 0 <= center_x < width and 0 <= center_y < height:
                                # Get depth data as a 2D array
                                depth_data = np.frombuffer(depth_frame.get_data(), dtype=np.uint16)
                                depth_data = depth_data.reshape((height, width))
                                depth_data = depth_data.astype(np.float32) * scale

                                depth_data = temporal_filter.process(depth_data)

                                # Get depth value at the center of the bounding box
                                center_yy = int(height / 2) 
                                center_xx = int(width / 2)
                                z = depth_data[center_yy, center_xx]
                                z = round(z) - 180
                                
                                # Print the region and depth information
                                if center_y >= 400 and is_segmented(segmented_image, center_x, center_y):
                                    print(f" detected at region: {region_num}, y: {region_num}, z: {z}")
                                    if 165 <= z <= 600:
                                        command = f"x:{region_num} y:10 z:{z}\r"
                                    else:
                                        command = f"x:{region_num} y:10 z:200\r"
                            else:
                                if center_y >= 400 and is_segmented(segmented_image, center_x, center_y):
                                    # Print with a default z value when out of bounds
                                    print(f"Bounding box center is out of depth frame bounds (x: {region_num}, y: {center_y}, z: 200)")
                                    command = f"x:{region_num} y:10 z:200\r"
                        else:
                            if center_y >= 400 and is_segmented(segmented_image, center_x, center_y):
                                # Print with a default z value when depth frame is not available
                                print(f"Depth frame is not available for device {i} (x: {region_num}, y: {center_y}, z: 200)")
                                command = f"x:{region_num} y:10 z:200\r"

                # Annotate the image with detection results for high confidence detections
                results[0].boxes = [box for box in results[0].boxes if box.conf >= 0.70]
                
                # Annotate the image
                annotated_image = results[0].plot()
                
                # Draw regions for the current camera
                draw_regions(annotated_image, i)

                # Display the annotated image in a window
                window_name_yolo = f"Device {i} process"

                if i == 0: 
                    cv2.imshow(window_name_yolo, cv2.resize(annotated_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_yolo, window_x, window_y)
                elif i == 1:  
                    cv2.imshow(window_name_yolo, cv2.resize(annotated_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_yolo, window_x2, window_y2)

            # Process the depth frame (optional)
            if depth_frame is not None:
                width = depth_frame.get_width()
                height = depth_frame.get_height()
                scale = depth_frame.get_depth_scale()

                depth_data = np.frombuffer(depth_frame.get_data(), dtype=np.uint16)
                depth_data = depth_data.reshape((height, width))

                depth_data = depth_data.astype(np.float32) * scale 
                #depth_data = temporal_filter.process(depth_data)  # Apply temporal filtering

                depth_image = cv2.normalize(depth_data, None, 0, 10000, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
                depth_image = cv2.applyColorMap(depth_image, cv2.COLORMAP_JET)

                window_name_depth = f"Device {i} Depth"

                if i == 0:  # Birinci kamera için derinlik görüntüsü
                    cv2.imshow(window_name_depth, cv2.resize(depth_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_depth, window_x + 940, window_y)
                elif i == 1:  # İkinci kamera için derinlik görüntüsü
                    cv2.imshow(window_name_depth, cv2.resize(depth_image, (window_width, window_height)))
                    cv2.moveWindow(window_name_depth, window_x2 + 940, window_y2)

            # Check for exit key
            key = cv2.waitKey(1)  # Wait for a short period
            if key == ord('q'):  # Quit on 'q' key press
                return

ergunerrdogan · October 11, 2024, 8:00am

by the way ı tried
                                z = depth_data[center_y, center_x]