Quantcast
Channel: Raspberry Pi Forums
Viewing all articles
Browse latest Browse all 6814

AI Camera - IMX500 • How to Make Your Own YOLOv8 Model Work with the Raspberry Pi AI Camera

$
0
0
The code below is for performing deep learning inference using the standard Raspberry Pi Camera Module, not the AI Camera.

Code:

import cv2from ultralytics import YOLOfrom picamera2 import Picamera2import time# モデルのロードmodel = YOLO("/home/meiden/WRO/train26/weights/best.pt")class_names = model.names# カメラ初期化picam2 = Picamera2()picam2.configure(picam2.create_still_configuration(main={"size": (1477, 1108)}))picam2.start()time.sleep(1)# カラーラベルのマッピングcolor_map = {"green": 0, "red": 1, "yellow": 2, "blue": 3}print("Capturing image and running inference...")# 撮影img_rgb = picam2.capture_array()img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)# 不要部分を白塗りマスクheight, width, _ = img_bgr.shapeimg_bgr[0:800, :] = (255, 255, 255)img_bgr[:, 1350:] = (255, 255, 255)img_bgr[:, :700] = (255, 255, 255)img_bgr[0:900, width - 283:width] = (255, 255, 255)# 推論results = model(img_bgr)det = []for r in results:    for b in r.boxes:        x1, y1, x2, y2 = map(int, b.xyxy[0])        cls_id = int(b.cls[0])        conf = float(b.conf[0])        cls_name = class_names[cls_id]        cx = (x1 + x2) // 2        cy = (y1 + y2) // 2        # 可視化        cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)        cv2.circle(img_bgr, (cx, cy), 10, (0, 0, 255), -1)        label = f"{cls_name} {conf:.2f}"        cv2.putText(img_bgr, label, (x1, y1 - 5),                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)        cv2.putText(img_bgr, label, (x1, y1 - 5),                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)        if cls_name in color_map and conf > 0.40:            det.append({"cls": cls_name, "conf": conf, "cx": cx})# 検出結果を右から左へ並べ替えif det:    det.sort(key=lambda o: o["cx"], reverse=True)    codes = [str(color_map[o["cls"]]) for o in det]    send = "".join(codes)else:    send = "00123"print(f"Detected: {send}")# 画像保存cv2.imwrite("/home/meiden/WRO/output.jpg", img_bgr)print("Saved: /home/meiden/WRO/output.jpg")# 画像表示cv2.imshow("Detection Result", img_bgr)cv2.waitKey(0)cv2.destroyAllWindows()
I modified this code for the AI Camera as shown below, but it kept throwing errors and didn't work. If there's a fundamental error in this code, please point it out.

Code:

#!/usr/bin/env python3import argparseimport sysimport timefrom typing import Listimport cv2import numpy as npfrom picamera2 import CompletedRequest, MappedArray, Picamera2from picamera2.devices import IMX500from picamera2.devices.imx500 import NetworkIntrinsicsfrom picamera2.devices.imx500.postprocess import softmaxlast_detections = []LABELS = Noneintrinsics = Noneimx500 = Noneclass Classification:    def __init__(self, idx: int, score: float):        """Create a Classification object, recording the idx and score."""        self.idx = idx        self.score = scoredef get_label(request: CompletedRequest, idx: int) -> str:    """Retrieve the label corresponding to the classification index, robust for arbitrary label counts."""    global LABELS, intrinsics, imx500    if LABELS is None:        # Use intrinsics labels if present        LABELS = intrinsics.labels if (intrinsics is not None and intrinsics.labels is not None) else []        # Try to detect model output tensor size and adapt if needed        output_tensor_size = None        try:            output_shapes = imx500.get_output_shapes(request.get_metadata())            if output_shapes:                output_tensor_size = output_shapes[0][0]        except Exception:            output_tensor_size = None        if output_tensor_size is not None:            if output_tensor_size == len(LABELS) + 1:                # If model has one extra class (e.g. background at index 0), insert placeholder                LABELS = ["<background>"] + LABELS            elif output_tensor_size != len(LABELS):                # Warn but continue; we'll guard index access later                print(f"Warning: model output size ({output_tensor_size}) != labels length ({len(LABELS)})", file=sys.stderr)    # Safety: return placeholder if idx out of range    if LABELS is None or idx < 0 or idx >= len(LABELS):        return f"<label_{idx}>"    return LABELS[idx]def parse_and_draw_classification_results(request: CompletedRequest):    """Analyse and draw the classification results in the output tensor."""    try:        results = parse_classification_results(request)        draw_classification_results(request, results)    except Exception as e:        # Do not crash the main loop on unexpected error in callback        print(f"Error in pre_callback: {e}", file=sys.stderr)def parse_classification_results(request: CompletedRequest) -> List[Classification]:    """Parse the output tensor into the classification results above the threshold."""    global last_detections, intrinsics, imx500    np_outputs = imx500.get_outputs(request.get_metadata())    if np_outputs is None:        return last_detections    np_output = np_outputs[0]        np_output = np_output.flatten()        if intrinsics and getattr(intrinsics, "softmax", False):        np_output = softmax(np_output)    num_classes = len(np_output)    top_k = min(3, num_classes)    top_indices = np.argsort(-np_output)[:top_k]    last_detections = [Classification(int(index), float(np_output[int(index)])) for index in top_indices]    return last_detectionsdef draw_classification_results(request: CompletedRequest, results: List[Classification], stream: str = "main"):    """Draw the classification results for this request onto the ISP output."""    with MappedArray(request, stream) as m:        if intrinsics and intrinsics.preserve_aspect_ratio:            # Drawing ROI box            b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)            color = (255, 0, 0)  # red            cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)            cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))            text_left, text_top = b_x, b_y + 20        else:            text_left, text_top = 0, 0        # Drawing labels (in the ROI box if it exists)        for index, result in enumerate(results):            label = get_label(request, idx=result.idx)            text = f"{label}: {result.score:.3f}"            # Calculate text size and position            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)            text_x = text_left + 5            text_y = text_top + 15 + index * 20            # Create a copy of the array to draw the background with opacity            overlay = m.array.copy()            # Draw the background rectangle on the overlay            cv2.rectangle(overlay,                          (text_x, text_y - text_height),                          (text_x + text_width, text_y + baseline),                          (255, 255, 255),                          cv2.FILLED)            alpha = 0.3            cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)            # Draw text on top of the background            cv2.putText(m.array, text, (text_x, text_y),                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)def get_args():    """Parse command line arguments."""    parser = argparse.ArgumentParser()    parser.add_argument("--model", type=str, help="Path of the model",                        default="/usr/share/imx500-models/imx500_network_mobilenet_v2.rpk")    parser.add_argument("--fps", type=int, help="Frames per second")    parser.add_argument("-s", "--softmax", action=argparse.BooleanOptionalAction, help="Add post-process softmax")    parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,                        help="preprocess the image with preserve aspect ratio")    parser.add_argument("--labels", type=str,                        help="Path to the labels file")    parser.add_argument("--print-intrinsics", action="store_true",                        help="Print JSON network_intrinsics then exit")    return parser.parse_args()if __name__ == "__main__":    args = get_args()    # This must be called before instantiation of Picamera2    imx500 = IMX500(args.model)    intrinsics = imx500.network_intrinsics    if not intrinsics:        intrinsics = NetworkIntrinsics()        intrinsics.task = "classification"    elif intrinsics.task != "classification":        print("Network is not a classification task", file=sys.stderr)        exit()    # Override intrinsics from args    for key, value in vars(args).items():        if key == 'labels' and value is not None:            with open(value, 'r') as f:                intrinsics.labels = f.read().splitlines()        elif hasattr(intrinsics, key) and value is not None:            setattr(intrinsics, key, value)    # Defaults    if intrinsics.labels is None:        with open("assets/imagenet_labels.txt", "r") as f:            intrinsics.labels = f.read().splitlines()    intrinsics.update_with_defaults()    if args.print_intrinsics:        # Print intrinsics JSON and exit        print(intrinsics)        exit()    picam2 = Picamera2(imx500.camera_num)    config = picam2.create_preview_configuration(controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)    imx500.show_network_fw_progress_bar()    picam2.start(config, show_preview=False)    if intrinsics.preserve_aspect_ratio:        imx500.set_auto_aspect_ratio()    # Register the callback to parse and draw classification results    picam2.pre_callback = parse_and_draw_classification_results    # Main loop    try:        while True:            time.sleep(0.5)    except KeyboardInterrupt:        print("Exiting...")        picam2.close()
The current program is located in the file /home/meiden/picamera2/examples/imx500/imx500_classification_custom_demo.py.

Additionally, the model is located in the file home/meiden/picamera2/examples/imx500/best_imx_model.

The trained labels are stored in the file
/home/meiden/picamera2/examples/imx500/assets/marker_labels.txt
in the format blue red green yellow.

Currently, I enter the directory using `cd ~/picamera2/examples/imx500` in the terminal and start the program with this code:
python3 imx500_classification_custom_demo.py --model best_imx_model/network.rpk --labels assets/marker_labels.txt --softmax

Please let me know if you notice any issues. Thank you.

Statistics: Posted by ikengo — Mon Sep 08, 2025 10:29 am — Replies 0 — Views 186



Viewing all articles
Browse latest Browse all 6814

Trending Articles