AI Model on multiple camera

Question

问题描述

I want to run Person Detection AI model on camera connected on CSI0, CSI1 & CSI2.
I followed triple camera example, but not getting success in running person detection model.

复现步骤

Run the code
Check stream output on canMV IDE

硬件板卡

LCKFB

软件版本

CanMV_K230_LCKFB_micropython_v1.4-0-g6cce59c_nncase_v2.9.0

其他信息

import time, os, sys, ujson, gc, math
from libs.AIBase import AIBase
from libs.AI2D import Ai2d
from libs.Utils import *
from media.sensor import *
from media.display import *
from media.media import *
import nncase_runtime as nn
import ulab.numpy as np
import image
import aicube

sensor0 = None
sensor1 = None
sensor2 = None

class PersonDetectionApp(AIBase):
    def __init__(self,kmodel_path,model_input_size,labels,anchors,confidence_threshold=0.2,nms_threshold=0.5,nms_option=False,strides=[8,16,32],rgb888p_size=[224,224],display_size=[1920,1080],debug_mode=0):
        super().__init__(kmodel_path,model_input_size,rgb888p_size,debug_mode)
        self.kmodel_path=kmodel_path
        # 模型输入分辨率
        self.model_input_size=model_input_size
        # 标签
        self.labels=labels
        # 检测anchors设置
        self.anchors=anchors
        # 特征图降采样倍数
        self.strides=strides
        # 置信度阈值设置
        self.confidence_threshold=confidence_threshold
        # nms阈值设置
        self.nms_threshold=nms_threshold
        self.nms_option=nms_option
        # sensor给到AI的图像分辨率
        self.rgb888p_size=[ALIGN_UP(rgb888p_size[0],16),rgb888p_size[1]]
        # 显示分辨率
        self.display_size=[ALIGN_UP(display_size[0],16),display_size[1]]
        self.debug_mode=debug_mode
        # Ai2d实例，用于实现模型预处理
        self.ai2d=Ai2d(debug_mode)
        # 设置Ai2d的输入输出格式和类型
        self.ai2d.set_ai2d_dtype(nn.ai2d_format.NCHW_FMT,nn.ai2d_format.NCHW_FMT,np.uint8, np.uint8)

    # 配置预处理操作，这里使用了pad和resize，Ai2d支持crop/shift/pad/resize/affine，具体代码请打开/sdcard/app/libs/AI2D.py查看
    def config_preprocess(self,input_image_size=None):
        with ScopedTiming("set preprocess config",self.debug_mode > 0):
            # 初始化ai2d预处理配置，默认为sensor给到AI的尺寸，您可以通过设置input_image_size自行修改输入尺寸
            ai2d_input_size=input_image_size if input_image_size else self.rgb888p_size
            top,bottom,left,right,_=center_pad_param(self.rgb888p_size,self.model_input_size)
            self.ai2d.pad([0,0,0,0,top,bottom,left,right], 0, [0,0,0])
            self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel)
            self.ai2d.build([1,3,ai2d_input_size[1],ai2d_input_size[0]],[1,3,self.model_input_size[1],self.model_input_size[0]])

    # 自定义当前任务的后处理
    def postprocess(self,results):
        with ScopedTiming("postprocess",self.debug_mode > 0):
            # 这里使用了aicube模型的后处理接口anchorbasedet_post_preocess
            dets = aicube.anchorbasedet_post_process(results[0], results[1], results[2], self.model_input_size, self.rgb888p_size, self.strides, len(self.labels), self.confidence_threshold, self.nms_threshold, self.anchors, self.nms_option)
            return dets

def draw_result(dets):
    global display_size, labels

    img = image.Image(display_size[0], display_size[1], image.ARGB8888)

    if dets:
        img.clear()
        for det_box in dets:
            x1, y1, x2, y2 = det_box[2],det_box[3],det_box[4],det_box[5]
            w = float(x2 - x1) * display_size[0] // rgb888p_size[0]
            h = float(y2 - y1) * display_size[1] // rgb888p_size[1]
            x1 = int(x1 * display_size[0] // rgb888p_size[0])
            y1 = int(y1 * display_size[1] // rgb888p_size[1])
            x2 = int(x2 * display_size[0] // rgb888p_size[0])
            y2 = int(y2 * display_size[1] // rgb888p_size[1])
            if (h<(0.1*display_size[0])):
                continue
            if (w<(0.25*display_size[0]) and ((x1<(0.03*display_size[0])) or (x2>(0.97*display_size[0])))):
                continue
            if (w<(0.15*display_size[0]) and ((x1<(0.01*display_size[0])) or (x2>(0.99*display_size[0])))):
                continue
            img.draw_rectangle(x1 , y1 , int(w) , int(h), color=(255, 0, 255, 0), thickness = 2)
            img.draw_string_advanced( x1 , y1-50,32, " " + labels[det_box[0]] + " " + str(round(det_box[1],2)), color=(255,0, 255, 0))
    else:
        img.clear()

    return img

try:
    rgb888p_size = [1920, 1080]
    kmodel_path="/sdcard/examples/kmodel/person_detect_yolov5n.kmodel"
    confidence_threshold = 0.2
    nms_threshold = 0.6
    labels = ["person"]
    anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
    
    print("camera_test")

    # sensor0
    sensor0 = Sensor(id = 0, width = 1920, height = 1080, fps = 30)
    sensor0.reset()
    sensor0.set_framesize(width = 960, height = 540)
    sensor0.set_pixformat(Sensor.YUV420SP)
    sensor0.set_framesize(width = 1920, height = 1080, chn=CAM_CHN_ID_2)
    sensor0.set_pixformat(PIXEL_FORMAT_RGB_888_PLANAR, chn=CAM_CHN_ID_2)
    bind_info = sensor0.bind_info(x = 0, y = 0)
    Display.bind_layer(**bind_info, layer = Display.LAYER_VIDEO1)

    # sensor1
    sensor1 = Sensor(id = 1, width = 1920, height = 1080, fps = 30)
    sensor1.reset()
    sensor1.set_framesize(width = 960, height = 540)
    sensor1.set_pixformat(Sensor.YUV420SP)
    bind_info = sensor1.bind_info(x = 960, y = 0)
    Display.bind_layer(**bind_info, layer = Display.LAYER_VIDEO2)

    # sensor2
    sensor2 = Sensor(id = 2, width = 1920, height = 1080, fps = 30)
    sensor2.reset()
    sensor2.set_framesize(width = 960, height = 540)
    sensor2.set_pixformat(Sensor.RGB888)

    Display.init(Display.LT9611, to_ide = True)
    display_size=[Display.width(),Display.height()]
    MediaManager.init()

    # multiple sensor only need one excute run()
    sensor0.run()
    
    person_det=PersonDetectionApp(kmodel_path,model_input_size=[640,640],labels=labels,anchors=anchors,confidence_threshold=confidence_threshold,nms_threshold=nms_threshold,nms_option=False,strides=[8,16,32],rgb888p_size=rgb888p_size,display_size=display_size,debug_mode=0)
    person_det.config_preprocess()

    while True:
        os.exitpoint()
        
        cur_frame = sensor0.snapshot(chn=CAM_CHN_ID_2)
        res=person_det.run(cur_frame)
        osd_img = draw_result(res)
        Display.show_image(osd_img, 0, 0, Display.LAYER_OSD3)
        
        img = sensor2.snapshot()
        Display.show_image(img, x = 0, y = 540)
        
except KeyboardInterrupt as e:
    print("user stop: ", e)
except BaseException as e:
    import sys
    sys.print_exception(e)
finally:
    # multiple sensor all need excute stop()
    if sensor0 is not None:
        sensor0.stop()
    if sensor1 is not None:
        sensor1.stop()
    if sensor2 is not None:        
        sensor2.stop()
    # or call Sensor.deinit()
    # Sensor.deinit()

    # deinit display
    Display.deinit()

    os.exitpoint(os.EXITPOINT_ENABLE_SLEEP)
    time.sleep_ms(100)
    # deinit media buffer
    MediaManager.deinit()

Wy001 · Answer

Could you please clarify your specific requirement? Do you need human detection running simultaneously on all three cameras, or is it sufficient to have three cameras activated but only running human detection on one of them?