AI Camera - IMX500 • How to Make Your Own YOLOv8 Model Work with the Raspberry Pi AI Camera

The code below is for performing deep learning inference using the standard Raspberry Pi Camera Module, not the AI Camera.

Code:

import cv2from ultralytics import YOLOfrom picamera2 import Picamera2import time# モデルのロードmodel = YOLO("/home/meiden/WRO/train26/weights/best.pt")class_names = model.names# カメラ初期化picam2 = Picamera2()picam2.configure(picam2.create_still_configuration(main={"size": (1477, 1108)}))picam2.start()time.sleep(1)# カラーラベルのマッピングcolor_map = {"green": 0, "red": 1, "yellow": 2, "blue": 3}print("Capturing image and running inference...")# 撮影img_rgb = picam2.capture_array()img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)# 不要部分を白塗りマスクheight, width, _ = img_bgr.shapeimg_bgr[0:800, :] = (255, 255, 255)img_bgr[:, 1350:] = (255, 255, 255)img_bgr[:, :700] = (255, 255, 255)img_bgr[0:900, width - 283:width] = (255, 255, 255)# 推論results = model(img_bgr)det = []for r in results:    for b in r.boxes:        x1, y1, x2, y2 = map(int, b.xyxy[0])        cls_id = int(b.cls[0])        conf = float(b.conf[0])        cls_name = class_names[cls_id]        cx = (x1 + x2) // 2        cy = (y1 + y2) // 2        # 可視化        cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)        cv2.circle(img_bgr, (cx, cy), 10, (0, 0, 255), -1)        label = f"{cls_name} {conf:.2f}"        cv2.putText(img_bgr, label, (x1, y1 - 5),                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)        cv2.putText(img_bgr, label, (x1, y1 - 5),                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)        if cls_name in color_map and conf > 0.40:            det.append({"cls": cls_name, "conf": conf, "cx": cx})# 検出結果を右から左へ並べ替えif det:    det.sort(key=lambda o: o["cx"], reverse=True)    codes = [str(color_map[o["cls"]]) for o in det]    send = "".join(codes)else:    send = "00123"print(f"Detected: {send}")# 画像保存cv2.imwrite("/home/meiden/WRO/output.jpg", img_bgr)print("Saved: /home/meiden/WRO/output.jpg")# 画像表示cv2.imshow("Detection Result", img_bgr)cv2.waitKey(0)cv2.destroyAllWindows()

I modified this code for the AI Camera as shown below, but it kept throwing errors and didn't work. If there's a fundamental error in this code, please point it out.

Code:

#!/usr/bin/env python3import argparseimport sysimport timefrom typing import Listimport cv2import numpy as npfrom picamera2 import CompletedRequest, MappedArray, Picamera2from picamera2.devices import IMX500from picamera2.devices.imx500 import NetworkIntrinsicsfrom picamera2.devices.imx500.postprocess import softmaxlast_detections = []LABELS = Noneintrinsics = Noneimx500 = Noneclass Classification:    def __init__(self, idx: int, score: float):        """Create a Classification object, recording the idx and score."""        self.idx = idx        self.score = scoredef get_label(request: CompletedRequest, idx: int) -> str:    """Retrieve the label corresponding to the classification index, robust for arbitrary label counts."""    global LABELS, intrinsics, imx500    if LABELS is None:        # Use intrinsics labels if present        LABELS = intrinsics.labels if (intrinsics is not None and intrinsics.labels is not None) else []        # Try to detect model output tensor size and adapt if needed        output_tensor_size = None        try:            output_shapes = imx500.get_output_shapes(request.get_metadata())            if output_shapes:                output_tensor_size = output_shapes[0][0]        except Exception:            output_tensor_size = None        if output_tensor_size is not None:            if output_tensor_size == len(LABELS) + 1:                # If model has one extra class (e.g. background at index 0), insert placeholder                LABELS = ["<background>"] + LABELS            elif output_tensor_size != len(LABELS):                # Warn but continue; we'll guard index access later                print(f"Warning: model output size ({output_tensor_size}) != labels length ({len(LABELS)})", file=sys.stderr)    # Safety: return placeholder if idx out of range    if LABELS is None or idx < 0 or idx >= len(LABELS):        return f"<label_{idx}>"    return LABELS[idx]def parse_and_draw_classification_results(request: CompletedRequest):    """Analyse and draw the classification results in the output tensor."""    try:        results = parse_classification_results(request)        draw_classification_results(request, results)    except Exception as e:        # Do not crash the main loop on unexpected error in callback        print(f"Error in pre_callback: {e}", file=sys.stderr)def parse_classification_results(request: CompletedRequest) -> List[Classification]:    """Parse the output tensor into the classification results above the threshold."""    global last_detections, intrinsics, imx500    np_outputs = imx500.get_outputs(request.get_metadata())    if np_outputs is None:        return last_detections    np_output = np_outputs[0]        np_output = np_output.flatten()        if intrinsics and getattr(intrinsics, "softmax", False):        np_output = softmax(np_output)    num_classes = len(np_output)    top_k = min(3, num_classes)    top_indices = np.argsort(-np_output)[:top_k]    last_detections = [Classification(int(index), float(np_output[int(index)])) for index in top_indices]    return last_detectionsdef draw_classification_results(request: CompletedRequest, results: List[Classification], stream: str = "main"):    """Draw the classification results for this request onto the ISP output."""    with MappedArray(request, stream) as m:        if intrinsics and intrinsics.preserve_aspect_ratio:            # Drawing ROI box            b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request)            color = (255, 0, 0)  # red            cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)            cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0))            text_left, text_top = b_x, b_y + 20        else:            text_left, text_top = 0, 0        # Drawing labels (in the ROI box if it exists)        for index, result in enumerate(results):            label = get_label(request, idx=result.idx)            text = f"{label}: {result.score:.3f}"            # Calculate text size and position            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)            text_x = text_left + 5            text_y = text_top + 15 + index * 20            # Create a copy of the array to draw the background with opacity            overlay = m.array.copy()            # Draw the background rectangle on the overlay            cv2.rectangle(overlay,                          (text_x, text_y - text_height),                          (text_x + text_width, text_y + baseline),                          (255, 255, 255),                          cv2.FILLED)            alpha = 0.3            cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array)            # Draw text on top of the background            cv2.putText(m.array, text, (text_x, text_y),                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)def get_args():    """Parse command line arguments."""    parser = argparse.ArgumentParser()    parser.add_argument("--model", type=str, help="Path of the model",                        default="/usr/share/imx500-models/imx500_network_mobilenet_v2.rpk")    parser.add_argument("--fps", type=int, help="Frames per second")    parser.add_argument("-s", "--softmax", action=argparse.BooleanOptionalAction, help="Add post-process softmax")    parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction,                        help="preprocess the image with preserve aspect ratio")    parser.add_argument("--labels", type=str,                        help="Path to the labels file")    parser.add_argument("--print-intrinsics", action="store_true",                        help="Print JSON network_intrinsics then exit")    return parser.parse_args()if __name__ == "__main__":    args = get_args()    # This must be called before instantiation of Picamera2    imx500 = IMX500(args.model)    intrinsics = imx500.network_intrinsics    if not intrinsics:        intrinsics = NetworkIntrinsics()        intrinsics.task = "classification"    elif intrinsics.task != "classification":        print("Network is not a classification task", file=sys.stderr)        exit()    # Override intrinsics from args    for key, value in vars(args).items():        if key == 'labels' and value is not None:            with open(value, 'r') as f:                intrinsics.labels = f.read().splitlines()        elif hasattr(intrinsics, key) and value is not None:            setattr(intrinsics, key, value)    # Defaults    if intrinsics.labels is None:        with open("assets/imagenet_labels.txt", "r") as f:            intrinsics.labels = f.read().splitlines()    intrinsics.update_with_defaults()    if args.print_intrinsics:        # Print intrinsics JSON and exit        print(intrinsics)        exit()    picam2 = Picamera2(imx500.camera_num)    config = picam2.create_preview_configuration(controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12)    imx500.show_network_fw_progress_bar()    picam2.start(config, show_preview=False)    if intrinsics.preserve_aspect_ratio:        imx500.set_auto_aspect_ratio()    # Register the callback to parse and draw classification results    picam2.pre_callback = parse_and_draw_classification_results    # Main loop    try:        while True:            time.sleep(0.5)    except KeyboardInterrupt:        print("Exiting...")        picam2.close()

The current program is located in the file /home/meiden/picamera2/examples/imx500/imx500_classification_custom_demo.py.

Additionally, the model is located in the file home/meiden/picamera2/examples/imx500/best_imx_model.

The trained labels are stored in the file
/home/meiden/picamera2/examples/imx500/assets/marker_labels.txt
in the format blue red green yellow.

Currently, I enter the directory using `cd ~/picamera2/examples/imx500` in the terminal and start the program with this code:
python3 imx500_classification_custom_demo.py --model best_imx_model/network.rpk --labels assets/marker_labels.txt --softmax

Please let me know if you notice any issues. Thank you.

Statistics: Posted by ikengo — Mon Sep 08, 2025 10:29 am — Replies 0 — Views 186

AI Camera - IMX500 • How to Make Your Own YOLOv8 Model Work with the Raspberry Pi AI Camera

Trending Articles

RAMAYAMPET Mandal Sarpanch | Upa-Sarpanch | Ward member Mobile Numbers Medak...

लड़कियां सेक्स के दौरान क्यों करती है उह! आह!लड़कियां सेक्स के दौरान क्यों करती...

Neem Baba Extra Questions Answer Class 6 English Poorvi

Throw Back: 4×4 — Sikilitele (Ft Castro) Prod by JQ

Rajasthan Board 10th Result 2016 Roll No wise & Name Wise

Lowe faces four theft charges

Practice Sheet of Right form of verbs for HSC Students

Mafia, Murder & Mayhem In The Motor City: Detroit Mob Hit Timeline (1937-2007)

The 10 Tennessee Cities With The Largest Black Population For 2021

Materials Around Us Class 6 Worksheet Science Chapter 6

デスクトップヒープの枯渇

Best Suvichar in Hindi |बेस्ट सुविचार |शुभ विचार हिंदी में

Kanulanu Thaake Lyrics and translation | Manam (2014)

Korean Sex Porn Videos: XXX Videos & Free Porn Movies

Teen Shot In Miami Drive-By Dies From Injuries

Download: IQ Muzatasha feat Shy D & Pmj – Ulesi NiFertilizer Yamavuto

Mahakal Attitude Status

Property developer set up cannabis factory to help pay off debts...

♡

KB: How to troubleshoot issues when adding a Hyper-V host in System Center...