yolov5在pc上正常，一上开发板就跑飞

Question

问题描述

如标题，我的yolov5是从官方模型用工具剪枝的，原模型，剪枝模型，转为onnx和kmodel后的模型在pc端全都能正常使用，在开发板端就不行了，没找到明显的问题（也有可能是resize问题，但我确实不知道怎么改），可能是我对官方工具的理解有些出入，请大佬帮忙看看

硬件板卡

k230_canmv_v3

软件版本

rtos+linux

复现步骤

代码在主体就发不出来
void Model::preprocess(uintptr_t vaddr, uintptr_t paddr)
{
// input tensor
dims_t in_shape{1,720,1280,3};
auto in_tensor = host_runtime_tensor::create(dt_uint8, in_shape, { (gsl::byte *)vaddr, compute_size(in_shape) },false,hrt::pool_shared,paddr).expect("cannot create input tensor");
hrt::sync(in_tensor, sync_op_t::sync_write_back, true).expect("write back input failed");
// output tensor
dims_t out_shape=interp_.input_shape(0);
printf("kmodel:%dx%dx%dx%d ",kmodel_shape[0],kmodel_shape[1],kmodel_shape[2],kmodel_shape[3]);
printf("shape:%dx%dx%dx%d ",out_shape[0],out_shape[1],out_shape[2],out_shape[3]);
// config ai2d
ai2d_datatype_t ai2d_dtype { ai2d_format::RGB_packed, ai2d_format::NCHW_FMT, dt_uint8,dt_float32};//在头文件里找到相似定义（simple_types）
ai2d_crop_param_t crop_param { false, 0, 0, 720,1280};//横向裁减2/3,false了，美使用
ai2d_shift_param_t shift_param { false, 0 };
ai2d_pad_param_t pad_param { true, { { 0, 0 }, { 0, 0 }, { 70, 70 }, { 0, 0 } }, ai2d_pad_mode::constant, { 114,114,114 } };//pad模式常数
ai2d_resize_param_t resize_param { true, ai2d_interp_method::tf_bilinear, ai2d_interp_mode::half_pixel};
ai2d_affine_param_t affine_param { false};

// run
ai2d_builder builder { in_shape, out_shape, ai2d_dtype, crop_param, shift_param, pad_param, resize_param, affine_param };
auto start = std::chrono::steady_clock::now();
builder.build_schedule().expect("error occurred in ai2d build_schedule");
builder.invoke(in_tensor, ai2d_out_tensor_).expect("error occurred in ai2d invoke");
auto stop = std::chrono::steady_clock::now();
double duration = std::chrono::duration(stop - start).count();
std::cout << "ai2d run: duration = " << duration << " ms, fps = " << 1000 / duration << std::endl;

return;

}
int Model::run(uintptr_t vaddr, uintptr_t paddr)
{
preprocess(vaddr, paddr);
kpu_run();
return postprocess();
}
// ==================== NMS后处理 ====================

int Model::nms_filter(const float* model_output, float conf_threshold, float iou_threshold)
{
static int n=0;
// 直接写文件，最简版
FILE *fp = fopen("out.txt", "a");
fprintf(fp,"第%d次 ",n++);
for (int i = 0; i < 6300; i++) {
for (int j = 0; j < 7; j++) {
fprintf(fp, "%.6f ", model_output[i * 7 + j]);
}
fprintf(fp, " ");
}
fclose(fp);
const int num_boxes = 6300; // 固定总框数
const int num_classes = 2; // 固定2分类
int valid_boxes[num_boxes]; // 存储有效框索引
int stride[num_boxes];
int num_valid = 0;
float x=0,y=0,w=0,h=0;
float max_conf=0,conf=0,max_conf_o=0;
float conf_i=0,conf_j=0;
k_vo_draw_frame frame{false,0,0,0,0,0};
int class_id=0;
//清除框（便顺序了）
for(int n=0;n<16;n++)
{
frame.draw_en=false;
frame.frame_num=n;
int ret=kd_mpi_vo_draw_frame(&frame);
if(ret)
{
printf("clear triangle error!!! ");
}
}
// ====================== 1. 筛选有效框（置信度过滤） ======================
for (int i = 0; i < num_boxes; i++)
{
max_conf = 0;
// 6300×7结构：[x,y,w,h,conf,cls0,cls1]

    conf = model_output[i * 7 + 4];
    if (conf > max_conf)
        max_conf = conf;
    // 低于阈值跳过
    if (max_conf < conf_threshold)
        continue;

    valid_boxes[num_valid++] = i;
    
}
printf("num_valid:%d
",num_valid);
if (num_valid <= 0)
    return 0;

for (int i = 0; i < num_valid - 1; i++)
{
int idx_i = valid_boxes[i];
// 只用第5个值作为置信度
float score_i = model_output[idx_i * 7 + 4];

for (int j = i + 1; j < num_valid; j++)
{
    int idx_j = valid_boxes[j];
    // 只用第5个值作为置信度
    float score_j = model_output[idx_j * 7 + 4];

    // 降序：分数小的往后排
    if (score_i < score_j)
    {
        // 交换 box 索引
        swap_float(valid_boxes[i], valid_boxes[j]);
        // 同时更新 score_i
        swap_float(score_i, score_j);
    }
}

}
// ====================== 3. NMS 抑制 ======================
int removed[num_valid];
for (int i = 0; i < num_valid; i++)
removed[i] = 0;

int keep_count = 0;

for (int i = 0; i < num_valid; i++)
{
    if (removed[i])
        continue;

    keep_count++;
    int a_idx = valid_boxes[i];

    float ax = model_output[a_idx * 7 + 0];
    float ay = model_output[a_idx * 7 + 1];
    float aw = model_output[a_idx * 7 + 2];
    float ah = model_output[a_idx * 7 + 3];

    float a_x1 = ax - aw * 0.5f;
    float a_y1 = ay - ah * 0.5f;
    float a_x2 = ax + aw * 0.5f;
    float a_y2 = ay + ah * 0.5f;

    for (int j = i + 1; j < num_valid; j++)
    {
        if (removed[j])
            continue;

        int b_idx = valid_boxes[j];
        float bx = model_output[b_idx * 7 + 0];
        float by = model_output[b_idx * 7 + 1];
        float bw = model_output[b_idx * 7 + 2];
        float bh = model_output[b_idx * 7 + 3];

        float b_x1 = bx - bw * 0.5f;
        float b_y1 = by - bh * 0.5f;
        float b_x2 = bx + bw * 0.5f;
        float b_y2 = by + bh * 0.5f;

        // 计算 IoU
        float inter_x1 = (a_x1 > b_x1) ? a_x1 : b_x1;
        float inter_y1 = (a_y1 > b_y1) ? a_y1 : b_y1;
        float inter_x2 = (a_x2 < b_x2) ? a_x2 : b_x2;
        float inter_y2 = (a_y2 < b_y2) ? a_y2 : b_y2;

        float iou = 0.0f;
        if (inter_x2 > inter_x1 && inter_y2 > inter_y1)
        {
            float inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1);
            float a_area = aw * ah;
            float b_area = bw * bh;
            iou = inter_area / (a_area + b_area - inter_area);
        }

        if (iou > iou_threshold)
            removed[j] = 1;
    }
}

// ====================== 4. 输出最终保留的框 ======================
printf("
===== NMS 最终保留框 =====
");
//清除框

int final_count = 0;

for (int i = 0,p=0; i < num_valid; i++)
{

    if (!removed[i])
    {
        int idx = valid_boxes[i];
        float x = model_output[idx * 7 + 0];
        float y = model_output[idx * 7 + 1];
        float w = model_output[idx * 7 + 2];
        float h = model_output[idx * 7 + 3];

        float conf = 0;
        class_id = 0;
        for (int c = 0; c < num_classes; c++)
        {
            float v = model_output[idx * 7 + 5 + c];
            if (v > conf) {
                conf = v;
                class_id = c;
            }
        }

        // 输出坐标
	frame.draw_en=true;
frame.line_x_start = (x-w*0.5f) * (1280.0f / 320.0f);
frame.line_y_start = (y-h*0.5f) * (1280.0f / 320.0f);
frame.line_x_end   = (x + w*0.5f) * (1280.0f / 320.0f);
frame.line_y_end   = (y + h*0.5f) * (1280.0f / 320.0f);
        frame.frame_num=p++;
        int ret=kd_mpi_vo_draw_frame(&frame);
        final_count++;
printf("x:%d,y:%d,w:%d,h:%d
",frame.line_x_start,frame.line_y_start,frame.line_x_end,frame.line_y_end);
    }
}
printf("numg valid:%d
",num_valid);

return final_count;

}
int Model::postprocess()
{

float *out;
auto tensor = output_tensor(0);
    auto buf = tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_read).unwrap().buffer();
    out = reinterpret_cast(buf.data());
auto input_shape = interp_.input_shape(0);
printf("in_shape:%dx%dx%d
",input_shape[0],input_shape[1],input_shape[2]);
auto output_shape = interp_.output_shape(0);
printf("out_shape:%dx%dx%d
",output_shape[0],output_shape[1],output_shape[2]);

int final_boxes = nms_filter(
out, // 输入
0.5f, // 置信度阈值
0.45f // IOU 阈值
);
printf("final_boxes:%d ",final_boxes);

return final_boxes;

}

硬件板卡

k230_canmv_v3

软件版本

rtos+linux

8883- · Answer

这是模拟代码
import os
import copy
import argparse
import numpy as np
import onnx
import onnxruntime as ort
import nncase
import sys
import subprocess
import cv2

result = subprocess.run(["pip", "show", "nncase"], capture_output=True)
split_flag = " "
if sys.platform == "win32":
split_flag = " "

location_s = [i for i in result.stdout.decode().split(split_flag) if i.startswith("Location:")]
location = location_s[0].split(": ")[1]

if "PATH" in os.environ:
os.environ["PATH"] += os.pathsep + location
else:
os.environ["PATH"] = location

def read_model_file(model_file):
with open(model_file, 'rb') as f:
model_content = f.read()
return model_content

def cosine(gt, pred):
return (gt @ pred) / (np.linalg.norm(gt, 2) * np.linalg.norm(pred, 2))

==================== YOLOv5 前处理 ====================

def letterbox(img, new_shape=(320, 320), color=(114, 114, 114)):
shape = img.shape[:2]
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
dw, dh = dw / 2, dh / 2
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top = int(round(dh - 0.1))
bottom = int(round(dh + 0.1))
left = int(round(dw - 0.1))
right = int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return img, r, (dw, dh)

def preprocess_image(image_path):
img = cv2.imread(image_path)
# 保存原始图用于画图
ori_img = img.copy()
img, scale, (pad_w, pad_h) = letterbox(img)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.0
blob = img.transpose(2, 0, 1)
blob = blob[None, ...]
return blob, scale, pad_w, pad_h, ori_img

==================== YOLOv5 后处理 ====================

def postprocess(output, scale, pad_w, pad_h, conf_thres=0.15, iou_thres=0.45):
predictions = output[0]
obj_conf = predictions[:, 4]
cls_max = np.maximum(predictions[:, 5], predictions[:, 6])
final_conf = obj_conf * cls_max
mask = final_conf >= conf_thres
filtered = predictions[mask]
confs = final_conf[mask]
dets = []
for p, c in zip(filtered, confs):
x, y, w, h = p[0:4]
# 坐标映射回原图
x1 = int((x - w/2 - pad_w) / scale)
y1 = int((y - h/2 - pad_h) / scale)
x2 = int((x + w/2 - pad_w) / scale)
y2 = int((y + h/2 - pad_h) / scale)
cls = 0 if p[5] > p[6] else 1
dets.append([x1, y1, x2, y2, c, cls])

# NMS
dets = sorted(dets, key=lambda x: x[4], reverse=True)
keep = []
for d in dets:
    keep_flag = True
    for k in keep:
        ix1 = max(d[0], k[0])
        iy1 = max(d[1], k[1])
        ix2 = min(d[2], k[2])
        iy2 = min(d[3], k[3])
        area = max(0, ix2-ix1) * max(0, iy2-iy1)
        iou = area / ((d[2]-d[0])*(d[3]-d[1]) + (k[2]-k[0])*(k[3]-k[1]) - area + 1e-8)
        if iou > iou_thres:
            keep_flag = False
            break
    if keep_flag:
        keep.append(d)
return keep

==================== 画图函数 ====================

def draw_and_save(img, dets, save_path="result.jpg"):
for x1, y1, x2, y2, conf, cls in dets:
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, f"{conf:.2f}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.imwrite(save_path, img)

==================== main ====================

def main():
parser = argparse.ArgumentParser(prog="nncase")
parser.add_argument("--model", type=str, help='original model file')
parser.add_argument("--kmodel", type=str, help='kmodel file')
parser.add_argument("--image", type=str, help='test image path')
args = parser.parse_args()

# 前处理（融合）
blob, scale, pad_w, pad_h, ori_img = preprocess_image(args.image)
blob.tofile("input.bin")

# ONNX 推理
ort_session = ort.InferenceSession(args.model)
output_names = [node.name for node in ort_session.get_outputs()]
model_input = ort_session.get_inputs()[0]
model_input_name = model_input.name
model_input_shape = (1,3,320,320)

model_input_data = np.fromfile("input.bin", dtype=np.float32).reshape(model_input_shape)
cpu_results = ort_session.run(output_names, {model_input_name: model_input_data})

# kmodel 推理
sim = nncase.Simulator()
kmodel = read_model_file(args.kmodel)
sim.load_model(kmodel)

input = np.fromfile("input.bin", dtype=np.float32).reshape([1,3,320,320])
sim.set_input_tensor(0, nncase.RuntimeTensor.from_numpy(input))

sim.run()
nncase_results = []
for i in range(sim.outputs_size):
    nncase_result = sim.get_output_tensor(i).to_numpy()
    nncase_results.append(copy.deepcopy(nncase_result))

# 保存输出
with open("out1.txt", "w") as f:
    for idx, res in enumerate(cpu_results):
        f.write(f"========== ONNX Output {idx} ==========
")
        f.write(f"Shape: {res.shape}
")
        f.write(f"Data:
{res}

")
print(" ONNX 输出已保存 → out1.txt")

with open("out2.txt", "w") as f:
    for idx, res in enumerate(nncase_results):
        f.write(f"========== KMODEL Output {idx} ==========
")
        f.write(f"Shape: {res.shape}
")
        f.write(f"Data:
{res}

")
print(" Kmodel 输出已保存 → out2.txt")

# 余弦对比
print("
========== 余弦相似度对比 ==========")
for i in range(len(cpu_results)):
    cos = cosine(cpu_results[i].flatten(), nncase_results[i].flatten())
    print(f'Output {i} cosine: {cos:.6f}')

# 后处理（融合）
print("
========== ONNX 后处理结果 ==========")
boxes_onnx = postprocess(cpu_results[0], scale, pad_w, pad_h)
print(f"检测目标：{len(boxes_onnx)}")

print("
========== Kmodel 后处理结果 ==========")
boxes_kmodel = postprocess(nncase_results[0], scale, pad_w, pad_h)
print(f"检测目标：{len(boxes_kmodel)}")

# ==================== 画图输出图片====================
draw_and_save(ori_img, boxes_onnx, "result_onnx.jpg")
draw_and_save(ori_img.copy(), boxes_kmodel, "result_kmodel.jpg")
print("
  检测完成！结果图已保存：")
print("   - result_onnx.jpg")
print("   - result_kmodel.jpg")

if name == 'main':
main()
#as

yolov5在pc上正常，一上开发板就跑飞

问题描述

硬件板卡

软件版本

复现步骤

硬件板卡

软件版本

1 Answers

==================== YOLOv5 前处理 ====================

==================== YOLOv5 后处理 ====================

==================== 画图函数 ====================

==================== main ====================