nncase Model Simulator API Manual#
Overview#
In addition to the compiled model API, nncase also provides an inference model API. Use Python to infer the kmodel generated by the compiled model on a PC to verify whether the nncase inference results are consistent with the results generated under the runtime of the corresponding deep learning framework. The API provided by this document is used to verify the correctness of kmodel conversion on the local PC, not the code running on k230. For learning about nncase, please refer to: nncase github repo.
API introduction#
MemoryRange#
Description
MemoryRange class, used to represent memory range.
definition
py::class_<memory_range>(m, "MemoryRange")
.def_readwrite("location", &memory_range::memory_location)
.def_property(
"dtype", [](const memory_range &range) { return to_dtype(range.datatype); },
[](memory_range &range, py::object dtype) { range.datatype = from_dtype(py::dtype::from_args(dtype)); })
.def_readwrite("start", &memory_range::start)
.def_readwrite("size", &memory_range::size);
property
name |
type |
Description |
|---|---|---|
location |
int |
Memory location, 0 represents input, 1 represents output, 2 represents rdata, 3 represents data, 4 represents shared_data |
dtype |
python data types |
data type |
start |
int |
memory starting address |
Size |
int |
Memory size |
Example
mr = nncase.MemoryRange()
RuntimeTensor#
Description
RuntimeTensor class, used to represent runtime tensor.
definition
py::class_<runtime_tensor>(m, "RuntimeTensor")
.def_static("from_numpy", [](py::array arr) {
auto src_buffer = arr.request();
auto datatype = from_dtype(arr.dtype());
auto tensor = host_runtime_tensor::create(
datatype,
to_rt_shape(src_buffer.shape),
to_rt_strides(src_buffer.itemsize, src_buffer.strides),
gsl::make_span(reinterpret_cast<gsl::byte *>(src_buffer.ptr), src_buffer.size * src_buffer.itemsize),
[=](gsl::byte *) { arr.dec_ref(); })
.unwrap_or_throw();
arr.inc_ref();
return tensor;
})
.def("copy_to", [](runtime_tensor &from, runtime_tensor &to) {
from.copy_to(to).unwrap_or_throw();
})
.def("to_numpy", [](runtime_tensor &tensor) {
auto host = tensor.as_host().unwrap_or_throw();
auto src_map = std::move(hrt::map(host, hrt::map_read).unwrap_or_throw());
auto src_buffer = src_map.buffer();
return py::array(
to_dtype(tensor.datatype()),
tensor.shape(),
to_py_strides(runtime::get_bytes(tensor.datatype()), tensor.strides()),
src_buffer.data());
})
.def_property_readonly("dtype", [](runtime_tensor &tensor) {
return to_dtype(tensor.datatype());
})
.def_property_readonly("shape", [](runtime_tensor &tensor) {
return to_py_shape(tensor.shape());
})
property
name |
type |
Description |
|---|---|---|
dtype |
python data types |
Tensor data type |
shape |
list |
tensor shape |
from_numpy#
Description
Construct a RuntimeTensor object from numpy.ndarray.
definition
from_numpy(py::array arr)
Parameters
name |
type |
Description |
|---|---|---|
Arr |
numpy.ndarray |
numpy.ndarray object |
Return Value
RuntimeTensor object.
Example
tensor = nncase.RuntimeTensor.from_numpy(self.inputs[i]['data'])
copy_to#
Description
Copy RuntimeTensor.
definition
copy_to(RuntimeTensor to)
Parameters
name |
type |
Description |
|---|---|---|
to |
RuntimeTensor |
RuntimeTensor object |
Return Value
None.
Example
sim.get_output_tensor(i).copy_to(to)
to_numpy#
Description
Convert RuntimeTensor to numpy.ndarray object.
definition
to_numpy()
Parameters
None.
Return Value
numpy.ndarray object.
Example
arr = sim.get_output_tensor(i).to_numpy()
Simulator#
Description
Simulator class, used for inferring kmodel on PC.
definition
py::class_<interpreter>(m, "Simulator")
.def(py::init())
.def("load_model", [](interpreter &interp, gsl::span<const gsl::byte> buffer) { interp.load_model(buffer).unwrap_or_throw(); })
.def_property_readonly("inputs_size", &interpreter::inputs_size)
.def_property_readonly("outputs_size", &interpreter::outputs_size)
.def("get_input_desc", &interpreter::input_desc)
.def("get_output_desc", &interpreter::output_desc)
.def("get_input_tensor", [](interpreter &interp, size_t index) { return interp.input_tensor(index).unwrap_or_throw(); })
.def("set_input_tensor", [](interpreter &interp, size_t index, runtime_tensor tensor) { return interp.input_tensor(index, tensor).unwrap_or_throw(); })
.def("get_output_tensor", [](interpreter &interp, size_t index) { return interp.output_tensor(index).unwrap_or_throw(); })
.def("set_output_tensor", [](interpreter &interp, size_t index, runtime_tensor tensor) { return interp.output_tensor(index, tensor).unwrap_or_throw(); })
.def("run", [](interpreter &interp) { interp.run().unwrap_or_throw(); })
property
name |
type |
Description |
|---|---|---|
inputs_size |
int |
Enter the number |
outputs_size |
int |
Number of outputs |
Example
sim = nncase.Simulator()
load_model#
Description
Load kmodel.
definition
load_model(model_content)
Parameters
name |
type |
Description |
|---|---|---|
model_content |
byte[] |
kmodel byte stream |
Return Value
None.
Example
sim.load_model(kmodel)
get_input_desc#
Description
Gets the description information of the input at the specified index.
definition
get_input_desc(index)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
Input index |
Return Value
MemoryRange
Example
input_desc_0 = sim.get_input_desc(0)
get_output_desc#
Description
Gets the description information for the output of the specified index.
definition
get_output_desc(index)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
index of output |
Return Value
MemoryRange
Example
output_desc_0 = sim.get_output_desc(0)
get_input_tensor#
Description
Gets the RuntimeTensor of the input at the specified index.
definition
get_input_tensor(index)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
Index of input tensor |
Return Value
RuntimeTensor
Example
input_tensor_0 = sim.get_input_tensor(0)
set_input_tensor#
Description
Sets the RuntimeTensor of the input at the specified index.
definition
set_input_tensor(index, tensor)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
Index of input tensor |
tensor |
RuntimeTensor |
input tensor |
Return Value
None.
Example
sim.set_input_tensor(0, nncase.RuntimeTensor.from_numpy(self.inputs[0]['data']))
get_output_tensor#
Description
Gets the RuntimeTensor of the output at the specified index.
definition
get_output_tensor(index)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
Index of output tensor |
Return Value
RuntimeTensor
Example
output_arr_0 = sim.get_output_tensor(0).to_numpy()
set_output_tensor#
Description
Sets the RuntimeTensor of the output at the specified index.
definition
set_output_tensor(index, tensor)
Parameters
name |
type |
Description |
|---|---|---|
index |
int |
Index of output tensor |
tensor |
RuntimeTensor |
output tensor |
Return Value
None.
Example
sim.set_output_tensor(0, tensor)
run#
Description
Run kmodel inference.
definition
run()
Parameters
None.
Return Value
None.
Example
sim.run()
Example#
Precondition: The yolov5s_onnx.py script has compiled the yolov5s.onnx model.
yolov5s_onnx_simu.py is located in the src/rtsmart/libs/nncase/examples/scripts subdirectory, and the content is as follows:
import os
import copy
import argparse
import numpy as np
import onnx
import onnxruntime as ort
import nncase
def read_model_file(model_file):
with open(model_file, 'rb') as f:
model_content = f.read()
return model_content
def cosine(gt, pred):
return (gt @ pred) / (np.linalg.norm(gt, 2) * np.linalg.norm(pred, 2))
def main():
parser = argparse.ArgumentParser(prog="nncase")
parser.add_argument("--model", type=str, help='original model file')
parser.add_argument("--model_input", type=str, help='input bin file for original model')
parser.add_argument("--kmodel", type=str, help='kmodel file')
parser.add_argument("--kmodel_input", type=str, help='input bin file for kmodel')
args = parser.parse_args()
# cpu inference
ort_session = ort.InferenceSession(args.model)
output_names = []
model_outputs = ort_session.get_outputs()
for i in range(len(model_outputs)):
output_names.append(model_outputs[i].name)
model_input = ort_session.get_inputs()[0]
model_input_name = model_input.name
model_input_type = np.float32
model_input_shape = model_input.shape
model_input_data = np.fromfile(args.model_input, model_input_type).reshape(model_input_shape)
cpu_results = []
cpu_results = ort_session.run(output_names, { model_input_name : model_input_data })
# create simulator
sim = nncase.Simulator()
# read kmodel
kmodel = read_model_file(args.kmodel)
# load kmodel
sim.load_model(kmodel)
# read input.bin
# input_tensor=sim.get_input_tensor(0).to_numpy()
dtype = sim.get_input_desc(0).dtype
input = np.fromfile(args.kmodel_input, dtype).reshape([1, 3, 320, 320])
# set input for simulator
sim.set_input_tensor(0, nncase.RuntimeTensor.from_numpy(input))
# simulator inference
nncase_results = []
sim.run()
for i in range(sim.outputs_size):
nncase_result = sim.get_output_tensor(i).to_numpy()
nncase_results.append(copy.deepcopy(nncase_result))
# compare
for i in range(sim.outputs_size):
cos = cosine(np.reshape(nncase_results[i], (-1)), np.reshape(cpu_results[i], (-1)))
print('output {0} cosine similarity : {1}'.format(i, cos))
if __name__ == '__main__':
main()
Execute inference script
root@5f718e19f8a7:/mnt/# cd rtos_sdk/src/rtsmart/libs/nncase/examples
root@5f718e19f8a7:/mnt/rtos_sdk/src/rtsmart/libs/nncase/examples # export PATH=$PATH:/usr/local/lib/python3.8/dist-packages/
root@5f718e19f8a7:/mnt/rtos_sdk/src/rtsmart/libs/nncase/examples # python3 scripts/yolov5s_onnx_simu.py --model models/yolov5s.onnx --model_input object_detect/data/input_fp32.bin --kmodel tmp/yolov5s_onnx/test.kmodel --kmodel_input object_detect/data/input_uint8.bin
The comparison between nncase simulator and CPU inference results is as follows
output 0 cosine similarity : 0.9997244477272034
output 1 cosine similarity : 0.999757707118988
output 2 cosine similarity : 0.9997308850288391
