`nncase` Model Simulator API Manual#

Overview#

In addition to the compiled model API, nncase also provides an inference model API. Use Python to infer the kmodel generated by the compiled model on a PC to verify whether the nncase inference results are consistent with the results generated under the runtime of the corresponding deep learning framework. The API provided by this document is used to verify the correctness of kmodel conversion on the local PC, not the code running on k230. For learning about nncase, please refer to: nncase github repo.

API introduction#

MemoryRange#

Description

MemoryRange class, used to represent memory range.

definition

py::class_<memory_range>(m, "MemoryRange")
    .def_readwrite("location", &memory_range::memory_location)
    .def_property(
        "dtype", [](const memory_range &range) { return to_dtype(range.datatype); },
        [](memory_range &range, py::object dtype) { range.datatype = from_dtype(py::dtype::from_args(dtype)); })
    .def_readwrite("start", &memory_range::start)
    .def_readwrite("size", &memory_range::size);

property

name	type	Description
location	int	Memory location, 0 represents input, 1 represents output, 2 represents rdata, 3 represents data, 4 represents shared_data
dtype	python data types	data type
start	int	memory starting address
Size	int	Memory size

Example

mr = nncase.MemoryRange()

RuntimeTensor#

Description

RuntimeTensor class, used to represent runtime tensor.

definition

py::class_<runtime_tensor>(m, "RuntimeTensor")
    .def_static("from_numpy", [](py::array arr) {
        auto src_buffer = arr.request();
        auto datatype = from_dtype(arr.dtype());
        auto tensor = host_runtime_tensor::create(
            datatype,
            to_rt_shape(src_buffer.shape),
            to_rt_strides(src_buffer.itemsize, src_buffer.strides),
            gsl::make_span(reinterpret_cast<gsl::byte *>(src_buffer.ptr), src_buffer.size * src_buffer.itemsize),
            [=](gsl::byte *) { arr.dec_ref(); })
                          .unwrap_or_throw();
        arr.inc_ref();
        return tensor;
    })
    .def("copy_to", [](runtime_tensor &from, runtime_tensor &to) {
        from.copy_to(to).unwrap_or_throw();
    })
    .def("to_numpy", [](runtime_tensor &tensor) {
        auto host = tensor.as_host().unwrap_or_throw();
        auto src_map = std::move(hrt::map(host, hrt::map_read).unwrap_or_throw());
        auto src_buffer = src_map.buffer();
        return py::array(
            to_dtype(tensor.datatype()),
            tensor.shape(),
            to_py_strides(runtime::get_bytes(tensor.datatype()), tensor.strides()),
            src_buffer.data());
    })
    .def_property_readonly("dtype", [](runtime_tensor &tensor) {
        return to_dtype(tensor.datatype());
    })
    .def_property_readonly("shape", [](runtime_tensor &tensor) {
        return to_py_shape(tensor.shape());
    })

property

name	type	Description
dtype	python data types	Tensor data type
shape	list	tensor shape

from_numpy#

Description

Construct a RuntimeTensor object from numpy.ndarray.

definition

from_numpy(py::array arr)

Parameters

name	type	Description
Arr	numpy.ndarray	numpy.ndarray object

Return Value

RuntimeTensor object.

Example

tensor = nncase.RuntimeTensor.from_numpy(self.inputs[i]['data'])

copy_to#

Description

Copy RuntimeTensor.

definition

copy_to(RuntimeTensor to)

Parameters

name	type	Description
to	RuntimeTensor	RuntimeTensor object

Return Value

None.

Example

sim.get_output_tensor(i).copy_to(to)

to_numpy#

Description

Convert RuntimeTensor to numpy.ndarray object.

definition

to_numpy()

Parameters

None.

Return Value

numpy.ndarray object.

Example

arr = sim.get_output_tensor(i).to_numpy()

Simulator#

Description

Simulator class, used for inferring kmodel on PC.

definition

py::class_<interpreter>(m, "Simulator")
    .def(py::init())
    .def("load_model", [](interpreter &interp, gsl::span<const gsl::byte> buffer) { interp.load_model(buffer).unwrap_or_throw(); })
    .def_property_readonly("inputs_size", &interpreter::inputs_size)
    .def_property_readonly("outputs_size", &interpreter::outputs_size)
    .def("get_input_desc", &interpreter::input_desc)
    .def("get_output_desc", &interpreter::output_desc)
    .def("get_input_tensor", [](interpreter &interp, size_t index) { return interp.input_tensor(index).unwrap_or_throw(); })
    .def("set_input_tensor", [](interpreter &interp, size_t index, runtime_tensor tensor) { return interp.input_tensor(index, tensor).unwrap_or_throw(); })
    .def("get_output_tensor", [](interpreter &interp, size_t index) { return interp.output_tensor(index).unwrap_or_throw(); })
    .def("set_output_tensor", [](interpreter &interp, size_t index, runtime_tensor tensor) { return interp.output_tensor(index, tensor).unwrap_or_throw(); })
    .def("run", [](interpreter &interp) { interp.run().unwrap_or_throw(); })

property

name	type	Description
inputs_size	int	Enter the number
outputs_size	int	Number of outputs

Example

sim = nncase.Simulator()

load_model#

Description

Load kmodel.

definition

load_model(model_content)

Parameters

name	type	Description
model_content	byte[]	kmodel byte stream

Return Value

None.

Example

sim.load_model(kmodel)

get_input_desc#

Description

Gets the description information of the input at the specified index.

definition

get_input_desc(index)

Parameters

name	type	Description
index	int	Input index

Return Value

MemoryRange

Example

input_desc_0 = sim.get_input_desc(0)

get_output_desc#

Description

Gets the description information for the output of the specified index.

definition

get_output_desc(index)

Parameters

name	type	Description
index	int	index of output

Return Value

MemoryRange

Example

output_desc_0 = sim.get_output_desc(0)

get_input_tensor#

Description

Gets the RuntimeTensor of the input at the specified index.

definition

get_input_tensor(index)

Parameters

name	type	Description
index	int	Index of input tensor

Return Value

RuntimeTensor

Example

input_tensor_0 = sim.get_input_tensor(0)

set_input_tensor#

Description

Sets the RuntimeTensor of the input at the specified index.

definition

set_input_tensor(index, tensor)

Parameters

name	type	Description
index	int	Index of input tensor
tensor	RuntimeTensor	input tensor

Return Value

None.

Example

sim.set_input_tensor(0, nncase.RuntimeTensor.from_numpy(self.inputs[0]['data']))

get_output_tensor#

Description

Gets the RuntimeTensor of the output at the specified index.

definition

get_output_tensor(index)

Parameters

name	type	Description
index	int	Index of output tensor

Return Value

RuntimeTensor

Example

output_arr_0 = sim.get_output_tensor(0).to_numpy()

set_output_tensor#

Description

Sets the RuntimeTensor of the output at the specified index.

definition

set_output_tensor(index, tensor)

Parameters

name	type	Description
index	int	Index of output tensor
tensor	RuntimeTensor	output tensor

Return Value

None.

Example

sim.set_output_tensor(0, tensor)

run#

Description

Run kmodel inference.

definition

run()

Parameters

None.

Return Value

None.

Example

sim.run()

Example#

Precondition: The yolov5s_onnx.py script has compiled the yolov5s.onnx model.

yolov5s_onnx_simu.py is located in the src/rtsmart/libs/nncase/examples/scripts subdirectory, and the content is as follows:

import os
import copy
import argparse
import numpy as np
import onnx
import onnxruntime as ort
import nncase

def read_model_file(model_file):
    with open(model_file, 'rb') as f:
        model_content = f.read()
    return model_content

def cosine(gt, pred):
    return (gt @ pred) / (np.linalg.norm(gt, 2) * np.linalg.norm(pred, 2))

def main():
    parser = argparse.ArgumentParser(prog="nncase")
    parser.add_argument("--model", type=str, help='original model file')
    parser.add_argument("--model_input", type=str, help='input bin file for original model')
    parser.add_argument("--kmodel", type=str, help='kmodel file')
    parser.add_argument("--kmodel_input", type=str, help='input bin file for kmodel')
    args = parser.parse_args()

    # cpu inference
    ort_session = ort.InferenceSession(args.model)
    output_names = []
    model_outputs = ort_session.get_outputs()
    for i in range(len(model_outputs)):
        output_names.append(model_outputs[i].name)
    model_input = ort_session.get_inputs()[0]
    model_input_name = model_input.name
    model_input_type = np.float32
    model_input_shape = model_input.shape
    model_input_data = np.fromfile(args.model_input, model_input_type).reshape(model_input_shape)
    cpu_results = []
    cpu_results = ort_session.run(output_names, { model_input_name : model_input_data })

    # create simulator
    sim = nncase.Simulator()

    # read kmodel
    kmodel = read_model_file(args.kmodel)

    # load kmodel
    sim.load_model(kmodel)

    # read input.bin
    # input_tensor=sim.get_input_tensor(0).to_numpy()
    dtype = sim.get_input_desc(0).dtype
    input = np.fromfile(args.kmodel_input, dtype).reshape([1, 3, 320, 320])

    # set input for simulator
    sim.set_input_tensor(0, nncase.RuntimeTensor.from_numpy(input))

    # simulator inference
    nncase_results = []
    sim.run()
    for i in range(sim.outputs_size):
        nncase_result = sim.get_output_tensor(i).to_numpy()
        nncase_results.append(copy.deepcopy(nncase_result))

    # compare
    for i in range(sim.outputs_size):
        cos = cosine(np.reshape(nncase_results[i], (-1)), np.reshape(cpu_results[i], (-1)))
        print('output {0} cosine similarity : {1}'.format(i, cos))

if __name__ == '__main__':
    main()

Execute inference script

root@5f718e19f8a7:/mnt/# cd rtos_sdk/src/rtsmart/libs/nncase/examples
root@5f718e19f8a7:/mnt/rtos_sdk/src/rtsmart/libs/nncase/examples # export PATH=$PATH:/usr/local/lib/python3.8/dist-packages/
root@5f718e19f8a7:/mnt/rtos_sdk/src/rtsmart/libs/nncase/examples # python3 scripts/yolov5s_onnx_simu.py --model models/yolov5s.onnx --model_input object_detect/data/input_fp32.bin --kmodel tmp/yolov5s_onnx/test.kmodel --kmodel_input object_detect/data/input_uint8.bin

The comparison between nncase simulator and CPU inference results is as follows

output 0 cosine similarity : 0.9997244477272034
output 1 cosine similarity : 0.999757707118988
output 2 cosine similarity : 0.9997308850288391

nncase Model Simulator API Manual

Contents

`nncase` Model Simulator API Manual#

Overview#

API introduction#

MemoryRange#

RuntimeTensor#

from_numpy#

copy_to#

to_numpy#

Simulator#

load_model#

get_input_desc#

get_output_desc#

get_input_tensor#

set_input_tensor#

get_output_tensor#

set_output_tensor#

run#

Example#

nncase Model Simulator API Manual

Contents

nncase Model Simulator API Manual#

Overview#

API introduction#

MemoryRange#

RuntimeTensor#

from_numpy#

copy_to#

to_numpy#

Simulator#

load_model#

get_input_desc#

get_output_desc#

get_input_tensor#

set_input_tensor#

get_output_tensor#

set_output_tensor#

run#

Example#

`nncase` Model Simulator API Manual#