如何提取yolov8分割的掩码图像

Question

重现步骤
在K230部署yolov8分割模型进行裂缝分割，想从分割的掩码图像中找到裂缝的轮廓，但是提取的掩码图像有检测框，该怎么提取出不包含检测框的分割后的掩码图像。代码是这样的，该怎么修改呢

        x1, y1, w, h = map(int, det)
        x1 = max(0, x1)
        y1 = max(0, y1)
        x2 = min(self.display_size[0], x1 + w)
        y2 = min(self.display_size[1], y1 + h)
        # 提取Alpha通道并二值化
        mask_region = self.masks[0, y1:y2, x1:x2, 3]
        binary_mask = np.array(np.where(mask_region > 0, 255, 0), dtype=np.uint8)      
        binary_bytes = binary_mask.tobytes()
        # 转换为图像对象（假设是单通道）
        H, W = binary_mask.shape[0], binary_mask.shape[1]
        print(f"H:{H},W:{W}")
        img_mask = image.Image(W, H, image.GRAYSCALE, alloc=image.ALLOC_REF, data=binary_bytes)
        img_mask.save("/data/mask_debug.jpg")       
        edges = img_mask.find_edges(image.EDGE_CANNY)
        img_mask.save("/data/mask_edges.jpg")

期待结果和实际结果

软硬件版本信息

错误日志

尝试解决过程

补充材料

# 自定义YOLOv8分割类
class SegmentationApp(AIBase):
    def __init__(self,task_type="segment",mode="video",kmodel_path="",labels=[],rgb888p_size=[320,320],model_input_size=[320,320],display_size=[1920,1080],conf_thresh=0.5,nms_thresh=0.45,mask_thresh=0.5,max_boxes_num=50,debug_mode=0):
#        if task_type not in ["classify","detect","segment"]:
#            print("Please select the correct task_type parameter, including 'classify', 'detect', 'segment'.")
#            return
        super().__init__(kmodel_path,model_input_size,rgb888p_size,debug_mode)
        self.task_type=task_type
        self.mode=mode
        # 模型路径
        self.kmodel_path=kmodel_path
        # 分割类别标签
        self.labels=labels
        self.class_num=len(labels)
        # sensor给到AI的图像分辨率
        if mode=="video":
            self.rgb888p_size=[ALIGN_UP(rgb888p_size[0],16),rgb888p_size[1]]
        else:
            self.rgb888p_size=[rgb888p_size[0],rgb888p_size[1]]
        # 模型输入分辨率
        self.model_input_size=model_input_size
        # 显示分辨率
        self.display_size=[ALIGN_UP(display_size[0],16),display_size[1]]
        # 置信度阈值
        self.conf_thresh=conf_thresh
        # nms阈值
        self.nms_thresh=nms_thresh
        # mask阈值
        self.mask_thresh=mask_thresh
        self.max_boxes_num=max_boxes_num

        self.debug_mode=debug_mode

        self.scale=1.0
        self.colors=get_colors(len(self.labels))
        self.masks=None

        self.sock = None  # Socket连接对象
        self.last_heartbeat = time.ticks_ms()

#        self.widths_info = []  # 新增：存储宽度信息

        if self.task_type=="segment":
            if self.mode=="image":
                self.masks=np.zeros((1,self.rgb888p_size[1],self.rgb888p_size[0],4),dtype=np.uint8)
            elif self.mode=="video":
                self.masks=np.zeros((1,self.display_size[1],self.display_size[0],4),dtype=np.uint8)
        # Ai2d实例，用于实现模型预处理
        self.ai2d=Ai2d(self.debug_mode)
        # 设置Ai2d的输入输出格式和类型
        self.ai2d.set_ai2d_dtype(nn.ai2d_format.NCHW_FMT,nn.ai2d_format.NCHW_FMT,np.uint8, np.uint8)

#        # 初始化UART串口
#        self.uart = UART(UART.UART1,baudrate=115200) #设置串口号1和波特率

    # 配置预处理操作，这里使用了pad和resize，Ai2d支持crop/shift/pad/resize/affine，具体代码请打开/sdcard/app/libs/AI2D.py查看

    def config_preprocess(self,input_image_size=None):
        with ScopedTiming("set preprocess config",self.debug_mode > 0):
            # 初始化ai2d预处理配置，默认为sensor给到AI的尺寸，您可以通过设置input_image_size自行修改输入尺寸
            ai2d_input_size=input_image_size if input_image_size else self.rgb888p_size

            top,bottom,left,right,scale=letterbox_pad_param(self.rgb888p_size,self.model_input_size)
            self.ai2d.pad([0,0,0,0,top,bottom,left,right], 0, [128,128,128])
            self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel)
            # build参数包含输入shape和输出shape
            self.ai2d.build([1,3,ai2d_input_size[1],ai2d_input_size[0]],[1,3,self.model_input_size[1],self.model_input_size[0]])

    # 自定义当前任务的后处理
    def postprocess(self,results):
        with ScopedTiming("postprocess",self.debug_mode > 0):

            new_result=results[0][0].transpose()
            if self.mode=="image":
                seg_res = aidemo.yolov8_seg_postprocess(new_result.copy(),results[1][0],[self.rgb888p_size[1],self.rgb888p_size[0]],[self.model_input_size[1],self.model_input_size[0]],[self.rgb888p_size[1],self.rgb888p_size[0]],len(self.labels),self.conf_thresh,self.nms_thresh,self.mask_thresh,self.masks)
            elif self.mode=="video":
                seg_res = aidemo.yolov8_seg_postprocess(new_result.copy(),results[1][0],[self.rgb888p_size[1],self.rgb888p_size[0]],[self.model_input_size[1],self.model_input_size[0]],[self.display_size[1],self.display_size[0]],len(self.labels),self.conf_thresh,self.nms_thresh,self.mask_thresh,self.masks)

            return seg_res

    def calculate_crack_width_2(self, det):
        """
        从分割掩模中提取裂缝边缘并计算宽度
        """
#        x1, y1, w, h = map(int, det)
        x1, y1, w, h = map(lambda x: int(round(x, 0)), det)

        x1 = max(0, x1)
        y1 = max(0, y1)
        x2 = min(self.display_size[0], x1 + w)
        y2 = min(self.display_size[1], y1 + h)

        # 提取Alpha通道并二值化
        mask_data = self.masks[0, y1:y2, x1:x2, 3]

        # 裁剪出裂缝区域
#        roi = mask_data[y1:y1+h, x1:x1+w]
#        roi = roi * 255
#        roi_uint8 = np.array(roi, dtype=np.uint8)
        roi_uint8 = np.array(np.where(mask_data > 0, 255, 0), dtype=np.uint8)  # 确保数据是 uint8 类型并转换为 bytes

        roi_bytes = roi_uint8.tobytes()


        # 转换为图像对象（假设是单通道）
        H, W = mask_data.shape[0], mask_data.shape[1]
        print(f"H:{H},W:{W}")
        img_mask = image.Image(W, H, image.GRAYSCALE, alloc=image.ALLOC_REF, data=roi_bytes)

        img_mask.save("/data/mask_debug.jpg")


        # 查找裂缝区域
        thresholds = [(127, 255)]
        blobs = img_mask.find_blobs(thresholds, area_threshold=100, merge=True)

#        edges = img_mask.find_edges(image.EDGE_CANNY)

#        print(blobs)

        if blobs:

            blob = max(blobs, key=lambda b: b.area())
            print("blob.rect():", blob.rect())          # 输出 (x, y, w, h)
            print("blob.area():", blob.area())          # 输出 blob 面积
            print("blob.pixels():", blob.pixels())
            x_blob, y_blob, w_blob, h_blob = blob.rect()
            width = min(w_blob, h_blob)
            # 可选：像素到毫米转换
            pixel_to_mm = 0.007625  # 示例值，应根据实际标定
            width = width * pixel_to_mm
            return width
        else:
            print("未检测到裂缝区域")
            return 0

    # 绘制结果
    def draw_result(self, res, img):
        with ScopedTiming("draw result", self.debug_mode > 0):
            if self.mode == "video":
                if res[0]:
                    img.clear()
                    mask_img = image.Image(self.display_size[0], self.display_size[1], image.ARGB8888, alloc=image.ALLOC_REF, data=self.masks)
                    img.copy_from(mask_img)
                    dets, ids, scores = res[0], res[1], res[2]
                    for i, det in enumerate(dets):
                        x1, y1, w, h = map(lambda x: int(round(x, 0)), det)
                        width = self.calculate_crack_width_2(det)
                        cx = x1 + w//2
                        cy = y1 + h//2
                        print("width:",width)
                        display_text = f" {self.labels[int(ids[i])]} Width: {width}mm"
                        img.draw_string_advanced(x1, y1 - 50, 32, display_text, color=self.colors[int(ids[i])])
                else:
                    img.clear()
            elif self.mode == "image":
                if res[0]:
                    mask_rgb = self.masks[0, :, :, 1:4]
                    mask_img = image.Image(self.rgb888p_size[0], self.rgb888p_size[1], image.RGB888, alloc=image.ALLOC_REF, data=mask_rgb.copy())
                    dets, ids, scores = res[0], res[1], res[2]
                    for i, det in enumerate(dets):
                        x, y, w, h = map(lambda x: int(round(x, 0)), det)
                        width = self.calculate_crack_width_1(det)
                        print("width:",width)
                        display_text = f" {self.labels[int(ids[i])]} {round(scores[i], 2)} Width: {width}mm"
                        mask_img.draw_string_advanced(x, y - 50, 32, display_text, color=self.colors[int(ids[i])])
                    mask_img.compress_for_ide()
                else:
                    img.clear()
    # 多目标检测 非最大值抑制方法实现
    def nms(self,boxes,scores,thresh):
        """Pure Python NMS baseline."""
        x1,y1,x2,y2 = boxes[:, 0],boxes[:, 1],boxes[:, 2],boxes[:, 3]
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = np.argsort(scores,axis = 0)[::-1]
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            new_x1,new_y1,new_x2,new_y2,new_areas = [],[],[],[],[]
            for order_i in order:
                new_x1.append(x1[order_i])
                new_x2.append(x2[order_i])
                new_y1.append(y1[order_i])
                new_y2.append(y2[order_i])
                new_areas.append(areas[order_i])
            new_x1 = np.array(new_x1)
            new_x2 = np.array(new_x2)
            new_y1 = np.array(new_y1)
            new_y2 = np.array(new_y2)
            xx1 = np.maximum(x1[i], new_x1)
            yy1 = np.maximum(y1[i], new_y1)
            xx2 = np.minimum(x2[i], new_x2)
            yy2 = np.minimum(y2[i], new_y2)
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h
            new_areas = np.array(new_areas)
            ovr = inter / (areas[i] + new_areas - inter)
            new_order = []
            for ovr_i,ind in enumerate(ovr):
                if ind < thresh:
                    new_order.append(order[ovr_i])
            order = np.array(new_order,dtype=np.uint8)
        return keep

if __name__=="__main__":
    # 显示模式，默认"hdmi",可以选择"hdmi"和"lcd"
#    uart.write('Hello 01Studio!')#发送一条数据
    print("裂缝检测启动...")
    #显示
    display_mode="lcd"
    rgb888p_size=[640,640]
    if display_mode=="hdmi":
        display_size=[1920,1080]
    else:
        display_size=[800,480]
    #模型配置
    kmodel_path="/data/best.kmodel"
    labels = ["Crack"]
    confidence_threshold = 0.5
    nms_threshold=0.45
    mask_threshold=0.5
    model_input_size=[320,320]

    # 初始化PipeLine
    pl=PipeLine(rgb888p_size=rgb888p_size,display_size=display_size,display_mode=display_mode)
    pl.create()
    # 初始化YOLOv8实例
   yolo=SegmentationApp(task_type="segment",mode="video",kmodel_path=kmodel_path,labels=labels,rgb888p_size=rgb888p_size,model_input_size=model_input_size,display_size=display_size,conf_thresh=confidence_threshold,nms_thresh=nms_threshold,mask_thresh=mask_threshold,max_boxes_num=50,debug_mode=0)
    yolo.config_preprocess()
    check = 0
    try:
        while True:
            os.exitpoint()
            with ScopedTiming("total",1):
                # 逐帧推理
                check = check+1
                img=pl.get_frame()
                res=yolo.run(img)
                yolo.draw_result(res,pl.osd_img)
                pl.show_image()
                time.sleep(0.1) #100ms
                gc.collect()
    except Exception as e:
        print("发生错误:", e)
    finally:
        yolo.deinit()
        pl.destroy()
        print("系统已关闭")

Wy001 · Answer

你是用的YOLO库实现的吗

如何提取yolov8分割的掩码图像

1 Answers