重现步骤
在K230部署yolov8分割模型进行裂缝分割,想从分割的掩码图像中找到裂缝的轮廓,但是提取的掩码图像有检测框,该怎么提取出不包含检测框的分割后的掩码图像。代码是这样的,该怎么修改呢
x1, y1, w, h = map(int, det)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(self.display_size[0], x1 + w)
y2 = min(self.display_size[1], y1 + h)
# 提取Alpha通道并二值化
mask_region = self.masks[0, y1:y2, x1:x2, 3]
binary_mask = np.array(np.where(mask_region > 0, 255, 0), dtype=np.uint8)
binary_bytes = binary_mask.tobytes()
# 转换为图像对象(假设是单通道)
H, W = binary_mask.shape[0], binary_mask.shape[1]
print(f"H:{H},W:{W}")
img_mask = image.Image(W, H, image.GRAYSCALE, alloc=image.ALLOC_REF, data=binary_bytes)
img_mask.save("/data/mask_debug.jpg")
edges = img_mask.find_edges(image.EDGE_CANNY)
img_mask.save("/data/mask_edges.jpg")
期待结果和实际结果
软硬件版本信息
错误日志
尝试解决过程
补充材料
# 自定义YOLOv8分割类
class SegmentationApp(AIBase):
def __init__(self,task_type="segment",mode="video",kmodel_path="",labels=[],rgb888p_size=[320,320],model_input_size=[320,320],display_size=[1920,1080],conf_thresh=0.5,nms_thresh=0.45,mask_thresh=0.5,max_boxes_num=50,debug_mode=0):
# if task_type not in ["classify","detect","segment"]:
# print("Please select the correct task_type parameter, including 'classify', 'detect', 'segment'.")
# return
super().__init__(kmodel_path,model_input_size,rgb888p_size,debug_mode)
self.task_type=task_type
self.mode=mode
# 模型路径
self.kmodel_path=kmodel_path
# 分割类别标签
self.labels=labels
self.class_num=len(labels)
# sensor给到AI的图像分辨率
if mode=="video":
self.rgb888p_size=[ALIGN_UP(rgb888p_size[0],16),rgb888p_size[1]]
else:
self.rgb888p_size=[rgb888p_size[0],rgb888p_size[1]]
# 模型输入分辨率
self.model_input_size=model_input_size
# 显示分辨率
self.display_size=[ALIGN_UP(display_size[0],16),display_size[1]]
# 置信度阈值
self.conf_thresh=conf_thresh
# nms阈值
self.nms_thresh=nms_thresh
# mask阈值
self.mask_thresh=mask_thresh
self.max_boxes_num=max_boxes_num
self.debug_mode=debug_mode
self.scale=1.0
self.colors=get_colors(len(self.labels))
self.masks=None
self.sock = None # Socket连接对象
self.last_heartbeat = time.ticks_ms()
# self.widths_info = [] # 新增:存储宽度信息
if self.task_type=="segment":
if self.mode=="image":
self.masks=np.zeros((1,self.rgb888p_size[1],self.rgb888p_size[0],4),dtype=np.uint8)
elif self.mode=="video":
self.masks=np.zeros((1,self.display_size[1],self.display_size[0],4),dtype=np.uint8)
# Ai2d实例,用于实现模型预处理
self.ai2d=Ai2d(self.debug_mode)
# 设置Ai2d的输入输出格式和类型
self.ai2d.set_ai2d_dtype(nn.ai2d_format.NCHW_FMT,nn.ai2d_format.NCHW_FMT,np.uint8, np.uint8)
# # 初始化UART串口
# self.uart = UART(UART.UART1,baudrate=115200) #设置串口号1和波特率
# 配置预处理操作,这里使用了pad和resize,Ai2d支持crop/shift/pad/resize/affine,具体代码请打开/sdcard/app/libs/AI2D.py查看
def config_preprocess(self,input_image_size=None):
with ScopedTiming("set preprocess config",self.debug_mode > 0):
# 初始化ai2d预处理配置,默认为sensor给到AI的尺寸,您可以通过设置input_image_size自行修改输入尺寸
ai2d_input_size=input_image_size if input_image_size else self.rgb888p_size
top,bottom,left,right,scale=letterbox_pad_param(self.rgb888p_size,self.model_input_size)
self.ai2d.pad([0,0,0,0,top,bottom,left,right], 0, [128,128,128])
self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel)
# build参数包含输入shape和输出shape
self.ai2d.build([1,3,ai2d_input_size[1],ai2d_input_size[0]],[1,3,self.model_input_size[1],self.model_input_size[0]])
# 自定义当前任务的后处理
def postprocess(self,results):
with ScopedTiming("postprocess",self.debug_mode > 0):
new_result=results[0][0].transpose()
if self.mode=="image":
seg_res = aidemo.yolov8_seg_postprocess(new_result.copy(),results[1][0],[self.rgb888p_size[1],self.rgb888p_size[0]],[self.model_input_size[1],self.model_input_size[0]],[self.rgb888p_size[1],self.rgb888p_size[0]],len(self.labels),self.conf_thresh,self.nms_thresh,self.mask_thresh,self.masks)
elif self.mode=="video":
seg_res = aidemo.yolov8_seg_postprocess(new_result.copy(),results[1][0],[self.rgb888p_size[1],self.rgb888p_size[0]],[self.model_input_size[1],self.model_input_size[0]],[self.display_size[1],self.display_size[0]],len(self.labels),self.conf_thresh,self.nms_thresh,self.mask_thresh,self.masks)
return seg_res
def calculate_crack_width_2(self, det):
"""
从分割掩模中提取裂缝边缘并计算宽度
"""
# x1, y1, w, h = map(int, det)
x1, y1, w, h = map(lambda x: int(round(x, 0)), det)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(self.display_size[0], x1 + w)
y2 = min(self.display_size[1], y1 + h)
# 提取Alpha通道并二值化
mask_data = self.masks[0, y1:y2, x1:x2, 3]
# 裁剪出裂缝区域
# roi = mask_data[y1:y1+h, x1:x1+w]
# roi = roi * 255
# roi_uint8 = np.array(roi, dtype=np.uint8)
roi_uint8 = np.array(np.where(mask_data > 0, 255, 0), dtype=np.uint8) # 确保数据是 uint8 类型并转换为 bytes
roi_bytes = roi_uint8.tobytes()
# 转换为图像对象(假设是单通道)
H, W = mask_data.shape[0], mask_data.shape[1]
print(f"H:{H},W:{W}")
img_mask = image.Image(W, H, image.GRAYSCALE, alloc=image.ALLOC_REF, data=roi_bytes)
img_mask.save("/data/mask_debug.jpg")
# 查找裂缝区域
thresholds = [(127, 255)]
blobs = img_mask.find_blobs(thresholds, area_threshold=100, merge=True)
# edges = img_mask.find_edges(image.EDGE_CANNY)
# print(blobs)
if blobs:
blob = max(blobs, key=lambda b: b.area())
print("blob.rect():", blob.rect()) # 输出 (x, y, w, h)
print("blob.area():", blob.area()) # 输出 blob 面积
print("blob.pixels():", blob.pixels())
x_blob, y_blob, w_blob, h_blob = blob.rect()
width = min(w_blob, h_blob)
# 可选:像素到毫米转换
pixel_to_mm = 0.007625 # 示例值,应根据实际标定
width = width * pixel_to_mm
return width
else:
print("未检测到裂缝区域")
return 0
# 绘制结果
def draw_result(self, res, img):
with ScopedTiming("draw result", self.debug_mode > 0):
if self.mode == "video":
if res[0]:
img.clear()
mask_img = image.Image(self.display_size[0], self.display_size[1], image.ARGB8888, alloc=image.ALLOC_REF, data=self.masks)
img.copy_from(mask_img)
dets, ids, scores = res[0], res[1], res[2]
for i, det in enumerate(dets):
x1, y1, w, h = map(lambda x: int(round(x, 0)), det)
width = self.calculate_crack_width_2(det)
cx = x1 + w//2
cy = y1 + h//2
print("width:",width)
display_text = f" {self.labels[int(ids[i])]} Width: {width}mm"
img.draw_string_advanced(x1, y1 - 50, 32, display_text, color=self.colors[int(ids[i])])
else:
img.clear()
elif self.mode == "image":
if res[0]:
mask_rgb = self.masks[0, :, :, 1:4]
mask_img = image.Image(self.rgb888p_size[0], self.rgb888p_size[1], image.RGB888, alloc=image.ALLOC_REF, data=mask_rgb.copy())
dets, ids, scores = res[0], res[1], res[2]
for i, det in enumerate(dets):
x, y, w, h = map(lambda x: int(round(x, 0)), det)
width = self.calculate_crack_width_1(det)
print("width:",width)
display_text = f" {self.labels[int(ids[i])]} {round(scores[i], 2)} Width: {width}mm"
mask_img.draw_string_advanced(x, y - 50, 32, display_text, color=self.colors[int(ids[i])])
mask_img.compress_for_ide()
else:
img.clear()
# 多目标检测 非最大值抑制方法实现
def nms(self,boxes,scores,thresh):
"""Pure Python NMS baseline."""
x1,y1,x2,y2 = boxes[:, 0],boxes[:, 1],boxes[:, 2],boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = np.argsort(scores,axis = 0)[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
new_x1,new_y1,new_x2,new_y2,new_areas = [],[],[],[],[]
for order_i in order:
new_x1.append(x1[order_i])
new_x2.append(x2[order_i])
new_y1.append(y1[order_i])
new_y2.append(y2[order_i])
new_areas.append(areas[order_i])
new_x1 = np.array(new_x1)
new_x2 = np.array(new_x2)
new_y1 = np.array(new_y1)
new_y2 = np.array(new_y2)
xx1 = np.maximum(x1[i], new_x1)
yy1 = np.maximum(y1[i], new_y1)
xx2 = np.minimum(x2[i], new_x2)
yy2 = np.minimum(y2[i], new_y2)
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
new_areas = np.array(new_areas)
ovr = inter / (areas[i] + new_areas - inter)
new_order = []
for ovr_i,ind in enumerate(ovr):
if ind < thresh:
new_order.append(order[ovr_i])
order = np.array(new_order,dtype=np.uint8)
return keep
if __name__=="__main__":
# 显示模式,默认"hdmi",可以选择"hdmi"和"lcd"
# uart.write('Hello 01Studio!')#发送一条数据
print("裂缝检测启动...")
#显示
display_mode="lcd"
rgb888p_size=[640,640]
if display_mode=="hdmi":
display_size=[1920,1080]
else:
display_size=[800,480]
#模型配置
kmodel_path="/data/best.kmodel"
labels = ["Crack"]
confidence_threshold = 0.5
nms_threshold=0.45
mask_threshold=0.5
model_input_size=[320,320]
# 初始化PipeLine
pl=PipeLine(rgb888p_size=rgb888p_size,display_size=display_size,display_mode=display_mode)
pl.create()
# 初始化YOLOv8实例
yolo=SegmentationApp(task_type="segment",mode="video",kmodel_path=kmodel_path,labels=labels,rgb888p_size=rgb888p_size,model_input_size=model_input_size,display_size=display_size,conf_thresh=confidence_threshold,nms_thresh=nms_threshold,mask_thresh=mask_threshold,max_boxes_num=50,debug_mode=0)
yolo.config_preprocess()
check = 0
try:
while True:
os.exitpoint()
with ScopedTiming("total",1):
# 逐帧推理
check = check+1
img=pl.get_frame()
res=yolo.run(img)
yolo.draw_result(res,pl.osd_img)
pl.show_image()
time.sleep(0.1) #100ms
gc.collect()
except Exception as e:
print("发生错误:", e)
finally:
yolo.deinit()
pl.destroy()
print("系统已关闭")