The code below is for performing deep learning inference using the standard Raspberry Pi Camera Module, not the AI Camera.I modified this code for the AI Camera as shown below, but it kept throwing errors and didn't work. If there's a fundamental error in this code, please point it out.
The current program is located in the file /home/meiden/picamera2/examples/imx500/imx500_classification_custom_demo.py.
Additionally, the model is located in the file home/meiden/picamera2/examples/imx500/best_imx_model.
The trained labels are stored in the file
/home/meiden/picamera2/examples/imx500/assets/marker_labels.txt
in the format blue red green yellow.
Currently, I enter the directory using `cd ~/picamera2/examples/imx500` in the terminal and start the program with this code:
python3 imx500_classification_custom_demo.py --model best_imx_model/network.rpk --labels assets/marker_labels.txt --softmax
Please let me know if you notice any issues. Thank you.
Code:
import cv2from ultralytics import YOLOfrom picamera2 import Picamera2import time# モデルのロードmodel = YOLO("/home/meiden/WRO/train26/weights/best.pt")class_names = model.names# カメラ初期化picam2 = Picamera2()picam2.configure(picam2.create_still_configuration(main={"size": (1477, 1108)}))picam2.start()time.sleep(1)# カラーラベルのマッピングcolor_map = {"green": 0, "red": 1, "yellow": 2, "blue": 3}print("Capturing image and running inference...")# 撮影img_rgb = picam2.capture_array()img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)# 不要部分を白塗りマスクheight, width, _ = img_bgr.shapeimg_bgr[0:800, :] = (255, 255, 255)img_bgr[:, 1350:] = (255, 255, 255)img_bgr[:, :700] = (255, 255, 255)img_bgr[0:900, width - 283:width] = (255, 255, 255)# 推論results = model(img_bgr)det = []for r in results: for b in r.boxes: x1, y1, x2, y2 = map(int, b.xyxy[0]) cls_id = int(b.cls[0]) conf = float(b.conf[0]) cls_name = class_names[cls_id] cx = (x1 + x2) // 2 cy = (y1 + y2) // 2 # 可視化 cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.circle(img_bgr, (cx, cy), 10, (0, 0, 255), -1) label = f"{cls_name} {conf:.2f}" cv2.putText(img_bgr, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) cv2.putText(img_bgr, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) if cls_name in color_map and conf > 0.40: det.append({"cls": cls_name, "conf": conf, "cx": cx})# 検出結果を右から左へ並べ替えif det: det.sort(key=lambda o: o["cx"], reverse=True) codes = [str(color_map[o["cls"]]) for o in det] send = "".join(codes)else: send = "00123"print(f"Detected: {send}")# 画像保存cv2.imwrite("/home/meiden/WRO/output.jpg", img_bgr)print("Saved: /home/meiden/WRO/output.jpg")# 画像表示cv2.imshow("Detection Result", img_bgr)cv2.waitKey(0)cv2.destroyAllWindows()Code:
#!/usr/bin/env python3import argparseimport sysimport timefrom typing import Listimport cv2import numpy as npfrom picamera2 import CompletedRequest, MappedArray, Picamera2from picamera2.devices import IMX500from picamera2.devices.imx500 import NetworkIntrinsicsfrom picamera2.devices.imx500.postprocess import softmaxlast_detections = []LABELS = Noneintrinsics = Noneimx500 = Noneclass Classification: def __init__(self, idx: int, score: float): """Create a Classification object, recording the idx and score.""" self.idx = idx self.score = scoredef get_label(request: CompletedRequest, idx: int) -> str: """Retrieve the label corresponding to the classification index, robust for arbitrary label counts.""" global LABELS, intrinsics, imx500 if LABELS is None: # Use intrinsics labels if present LABELS = intrinsics.labels if (intrinsics is not None and intrinsics.labels is not None) else [] # Try to detect model output tensor size and adapt if needed output_tensor_size = None try: output_shapes = imx500.get_output_shapes(request.get_metadata()) if output_shapes: output_tensor_size = output_shapes[0][0] except Exception: output_tensor_size = None if output_tensor_size is not None: if output_tensor_size == len(LABELS) + 1: # If model has one extra class (e.g. background at index 0), insert placeholder LABELS = ["<background>"] + LABELS elif output_tensor_size != len(LABELS): # Warn but continue; we'll guard index access later print(f"Warning: model output size ({output_tensor_size}) != labels length ({len(LABELS)})", file=sys.stderr) # Safety: return placeholder if idx out of range if LABELS is None or idx < 0 or idx >= len(LABELS): return f"<label_{idx}>" return LABELS[idx]def parse_and_draw_classification_results(request: CompletedRequest): """Analyse and draw the classification results in the output tensor.""" try: results = parse_classification_results(request) draw_classification_results(request, results) except Exception as e: # Do not crash the main loop on unexpected error in callback print(f"Error in pre_callback: {e}", file=sys.stderr)def parse_classification_results(request: CompletedRequest) -> List[Classification]: """Parse the output tensor into the classification results above the threshold.""" global last_detections, intrinsics, imx500 np_outputs = imx500.get_outputs(request.get_metadata()) if np_outputs is None: return last_detections np_output = np_outputs[0] np_output = np_output.flatten() if intrinsics and getattr(intrinsics, "softmax", False): np_output = softmax(np_output) num_classes = len(np_output) top_k = min(3, num_classes) top_indices = np.argsort(-np_output)[:top_k] last_detections = [Classification(int(index), float(np_output[int(index)])) for index in top_indices] return last_detectionsdef draw_classification_results(request: CompletedRequest, results: List[Classification], stream: str = "main"): """Draw the classification results for this request onto the ISP output.""" with MappedArray(request, stream) as m: if intrinsics and intrinsics.preserve_aspect_ratio: # Drawing ROI box b_x, b_y, b_w, b_h = imx500.get_roi_scaled(request) color = (255, 0, 0) # red cv2.putText(m.array, "ROI", (b_x + 5, b_y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) cv2.rectangle(m.array, (b_x, b_y), (b_x + b_w, b_y + b_h), (255, 0, 0, 0)) text_left, text_top = b_x, b_y + 20 else: text_left, text_top = 0, 0 # Drawing labels (in the ROI box if it exists) for index, result in enumerate(results): label = get_label(request, idx=result.idx) text = f"{label}: {result.score:.3f}" # Calculate text size and position (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) text_x = text_left + 5 text_y = text_top + 15 + index * 20 # Create a copy of the array to draw the background with opacity overlay = m.array.copy() # Draw the background rectangle on the overlay cv2.rectangle(overlay, (text_x, text_y - text_height), (text_x + text_width, text_y + baseline), (255, 255, 255), cv2.FILLED) alpha = 0.3 cv2.addWeighted(overlay, alpha, m.array, 1 - alpha, 0, m.array) # Draw text on top of the background cv2.putText(m.array, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)def get_args(): """Parse command line arguments.""" parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, help="Path of the model", default="/usr/share/imx500-models/imx500_network_mobilenet_v2.rpk") parser.add_argument("--fps", type=int, help="Frames per second") parser.add_argument("-s", "--softmax", action=argparse.BooleanOptionalAction, help="Add post-process softmax") parser.add_argument("-r", "--preserve-aspect-ratio", action=argparse.BooleanOptionalAction, help="preprocess the image with preserve aspect ratio") parser.add_argument("--labels", type=str, help="Path to the labels file") parser.add_argument("--print-intrinsics", action="store_true", help="Print JSON network_intrinsics then exit") return parser.parse_args()if __name__ == "__main__": args = get_args() # This must be called before instantiation of Picamera2 imx500 = IMX500(args.model) intrinsics = imx500.network_intrinsics if not intrinsics: intrinsics = NetworkIntrinsics() intrinsics.task = "classification" elif intrinsics.task != "classification": print("Network is not a classification task", file=sys.stderr) exit() # Override intrinsics from args for key, value in vars(args).items(): if key == 'labels' and value is not None: with open(value, 'r') as f: intrinsics.labels = f.read().splitlines() elif hasattr(intrinsics, key) and value is not None: setattr(intrinsics, key, value) # Defaults if intrinsics.labels is None: with open("assets/imagenet_labels.txt", "r") as f: intrinsics.labels = f.read().splitlines() intrinsics.update_with_defaults() if args.print_intrinsics: # Print intrinsics JSON and exit print(intrinsics) exit() picam2 = Picamera2(imx500.camera_num) config = picam2.create_preview_configuration(controls={"FrameRate": intrinsics.inference_rate}, buffer_count=12) imx500.show_network_fw_progress_bar() picam2.start(config, show_preview=False) if intrinsics.preserve_aspect_ratio: imx500.set_auto_aspect_ratio() # Register the callback to parse and draw classification results picam2.pre_callback = parse_and_draw_classification_results # Main loop try: while True: time.sleep(0.5) except KeyboardInterrupt: print("Exiting...") picam2.close()Additionally, the model is located in the file home/meiden/picamera2/examples/imx500/best_imx_model.
The trained labels are stored in the file
/home/meiden/picamera2/examples/imx500/assets/marker_labels.txt
in the format blue red green yellow.
Currently, I enter the directory using `cd ~/picamera2/examples/imx500` in the terminal and start the program with this code:
python3 imx500_classification_custom_demo.py --model best_imx_model/network.rpk --labels assets/marker_labels.txt --softmax
Please let me know if you notice any issues. Thank you.
Statistics: Posted by ikengo — Mon Sep 08, 2025 10:29 am — Replies 0 — Views 186