MC3302_SDK_V1.1.9_202507281.../media/sample/modules/npu/main.c
2025-11-11 12:08:31 +08:00

470 lines
17 KiB
C
Executable File

#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "sys.h"
#include "npu.h"
// #include "types/vmm_api.h"
#include "common.h"
#define MAX_MDL_IN_NUM (3)
#define MAX_MDL_OUT_NUM (3)
#define MAX_PATH_LEN (1024)
typedef struct{
char path[MAX_PATH_LEN];
uint32_t width;
uint32_t height;
E_TY_PixelFormat fmt;
}stImage;
typedef struct{
char mdlPath[MAX_PATH_LEN];
stImage inImageVec[MAX_MDL_IN_NUM];
uint32_t inImgNum;
char *outPath;
}stNpuOP;
static void printMdlDesc(T_TY_ModelDesc *desc){
INFO_LOG("model desc: version:%s", desc->info.version);
INFO_LOG("model desc: compile:%s", desc->info.compileDate);
INFO_LOG("model desc: input number:%d", desc->ioDesc.inputNum);
uint32_t idx=0;
for(;idx<desc->ioDesc.inputNum; idx++){
INFO_LOG("input blob#%d", idx);
printBlob(&desc->ioDesc.in[idx]);
}
INFO_LOG("model desc: output number:%d", desc->ioDesc.outputNum);
idx=0;
for(; idx<desc->ioDesc.outputNum; idx++){
INFO_LOG("output blob#%d", idx);
printBlob(&desc->ioDesc.out[idx]);
}
}
static uint32_t getMdlInputNum(T_TY_ModelDesc *desc){
return desc->ioDesc.inputNum;
}
static uint32_t getMdlOutputNum(T_TY_ModelDesc *desc){
return desc->ioDesc.outputNum;
}
static void freeTaskInOutBuf(T_TY_TaskInput inVec[], uint32_t inNum, T_TY_TaskOutput outVec[], uint32_t outNum){
uint32_t i=0;
for(; i<inNum; i++){
freeMem(inVec[i].dataIn.virAddr, inVec[i].dataIn.phyAddr, inVec[i].dataIn.size);
}
i=0;
for(; i<outNum; i++){
freeMem(outVec[i].dataOut.virAddr, outVec[i].dataOut.phyAddr, outVec[i].dataOut.size);
}
free(inVec);
free(outVec);
}
static void prepareModelInOutCache(T_TY_ModelDesc *desc, stNpuOP *npuOp, T_TY_TaskInput inVec[], uint32_t inNum, T_TY_TaskOutput outVec[], uint32_t outNum){
assert(desc->ioDesc.inputNum == npuOp->inImgNum);
assert(desc->ioDesc.inputNum == inNum);
assert(desc->ioDesc.outputNum== outNum);
uint32_t idx=0;
for(; idx<desc->ioDesc.inputNum; idx++){
T_TY_TaskInput *ptrTyIn = &inVec[idx];
stImage *ptrImg = &npuOp->inImageVec[idx];
T_TY_BlobDesc *ptrBlobDesc = &desc->ioDesc.in[idx];
ptrTyIn->descIn = *ptrBlobDesc;
if(ptrTyIn->descIn.type == E_TY_BLOB_DATA){
ptrTyIn->dataIn.size = getBlobSize(ptrBlobDesc);
}else if(ptrTyIn->descIn.type == E_TY_BLOB_IMAGE_WITH_PRE_PROC){
ptrTyIn->descIn.type = E_TY_BLOB_IMAGE;
ptrTyIn->descIn.img.picFormat = ptrImg->fmt;
ptrTyIn->descIn.img.picWidth = ptrImg->width;
ptrTyIn->descIn.img.picHeight = ptrImg->height;
ptrTyIn->descIn.img.picWidthStride = ptrTyIn->descIn.img.picWidth; //根据图片实际stride填写
ptrTyIn->descIn.img.picHeightStride= ptrTyIn->descIn.img.picHeight; //根据图片实际stride填写
ptrTyIn->descIn.img.roi.x = 0;
ptrTyIn->descIn.img.roi.y = 0;
ptrTyIn->descIn.img.roi.width = ptrTyIn->descIn.img.picWidth;
ptrTyIn->descIn.img.roi.height= ptrTyIn->descIn.img.picHeight;
ptrTyIn->dataIn.size = getImageSize(ptrTyIn->descIn.img.picWidth, ptrTyIn->descIn.img.picHeight, ptrTyIn->descIn.img.picFormat);
}else{
assert(0); //TBD,暂时没有原始模型输入Image的情况
}
mallocMem((void**)&ptrTyIn->dataIn.virAddr, &ptrTyIn->dataIn.phyAddr, ptrTyIn->dataIn.size, 8, E_TY_MEM_VMM_CACHED);
readFile(&ptrTyIn->dataIn, ptrImg->path);
flushVmmMem(ptrTyIn->dataIn.virAddr, ptrTyIn->dataIn.phyAddr, ptrTyIn->dataIn.size);
}
idx=0;
for(; idx<desc->ioDesc.outputNum; idx++){
T_TY_TaskOutput *out = &outVec[idx];
out->dataOut.size = getBlobSize(&desc->ioDesc.out[idx]);
mallocMem((void**)&out->dataOut.virAddr, &out->dataOut.phyAddr, out->dataOut.size, 8, E_TY_MEM_VMM_CACHED);
flushVmmMem(out->dataOut.virAddr, out->dataOut.phyAddr, out->dataOut.size);
}
}
static int32_t getFileLine(char *path, int32_t lineIdx, char line[]){
line[0] = '\0';
FILE *fp = fopen(path, "r");
if(NULL == fp){
ERROR_LOG("open file error, path:%s", path);
return -1;
}
int32_t idx=0;
char tmp[1024]={'\0'};
while(!feof(fp)){
fgets(tmp, 1024, fp);
if(idx == lineIdx){
strncpy(line, tmp, 1024);
break;
}
idx++;
}
fclose(fp);
return 0;
}
static void caffeSqueezenetPostproc(T_TY_TaskOutput outVec[], uint32_t outNum){
int32_t count = 1000;
assert(1 == outNum);
assert(count * sizeof(float) == outVec[0].dataOut.size);
typedef struct{
int32_t idx;
float prob;
}stOutItem;
stOutItem *ptrOutVec = malloc(sizeof(stOutItem) * count);
int32_t idx=0;
for(; idx<count; idx++){
ptrOutVec[idx].idx = idx;
#ifndef DP2000
ptrOutVec[idx].prob= *((float*)outVec[0].dataOut.virAddr +idx);
#else
ptrOutVec[idx].prob= *((float*)outVec[0].dataOut.virAddr +idx);
#endif
}
int32_t i=0;
for(; i<count - 1; i++){
int j=0;
for (; j<count -i - 1; j++) {
if(ptrOutVec[j].prob < ptrOutVec[j+1].prob){
stOutItem tmp = ptrOutVec[j+1];
ptrOutVec[j+1]= ptrOutVec[j];
ptrOutVec[j] = tmp;
}
}
}
// char *envLabelPath= getenv("POST_LABEL_FILE");
char *envLabelPath= "./npu/in_picture/output.txt";
if(NULL == envLabelPath){
INFO_LOG("please set env: POST_LABEL_FILE, eg:");
INFO_LOG("export POST_LABEL_FILE=/DEngine/tyexamples/data/datasets/ILSVRC2012/synset_1000.txt");
INFO_LOG("only print out top5 index");
i=0;
for(; i<5; i++){
INFO_LOG("TOP%d, prob %f labelIdx %d", i, ptrOutVec[i].prob, ptrOutVec[i].idx);
}
}else{
FILE *fp = fopen(envLabelPath, "r");
if(NULL == fp){
ERROR_LOG("open file error, path:%s", envLabelPath);
INFO_LOG("only print out top5 index");
i=0;
for(; i<5; i++){
INFO_LOG("TOP%d, prob %f labelIdx %d", i, ptrOutVec[i].prob, ptrOutVec[i].idx);
}
}else{
char label[1024] ={'\0'};
i=0;
for(; i<5; i++){
getFileLine(envLabelPath, ptrOutVec[i].idx, label);
label[strlen(label)-1] ='\0';
INFO_LOG("TOP%d, prob %f labelIdx %d label %s", i, ptrOutVec[i].prob, ptrOutVec[i].idx, label);
}
}
}
free(ptrOutVec);
}
int main(int argc, char* argv[]){
int syncMode = 1;
uint32_t queryPeriod = 1; //ms
if((argc != 1) && (argc != 2)){
INFO_LOG("Useage: ./%s -----sync mode", argv[0]);
INFO_LOG("Useage: ./%s 1 -----async mode, block query", argv[0]);
INFO_LOG("Useage: ./%s 0 -----async mode, periodical query, period 100ms", argv[0]);
return -1;
}
if(argc == 1){
INFO_LOG("sync mode");
syncMode = 1;
}else{
INFO_LOG("async mode");
syncMode = 0;
queryPeriod = atoi(argv[1]) == 0 ? 100 : 0;
}
int32_t tyRet;
tyRet = TY_NPU_SysInit();
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_SysInit fail, errCode:%d", tyRet);
return -1;
}
INFO_LOG("TY_NPU_SysInit done");
uint32_t loglvl =2;
tyRet = TY_SDK_SetLogLevel(loglvl);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_SDK_SetLogLevel fail, errCode:%d", tyRet);
TY_NPU_SysExit();
return -1;
}
INFO_LOG("TY_SDK_SetLogLevelexamplesne, log level:%d", loglvl);
int32_t majorVersion;
int32_t minorVersion;
int32_t patchVersion;
tyRet = TY_SDK_GetVersion(&majorVersion, &minorVersion, &patchVersion);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_SDK_GetVersion fail, errCode:%d", tyRet);
TY_NPU_SysExit();
return -1;
}
INFO_LOG("TY_SDK_GetVersion done, tyhcp version: v%d.%d.%d", majorVersion, minorVersion, patchVersion);
// get the path of models and data
// char *envMdlPath = getenv("MDL_PATH");
// char *envDataPath= getenv("DATA_PATH");
// if((NULL == envDataPath) || (NULL == envMdlPath)){
// ERROR_LOG("please set env: MDL_PATH and DATA_PATH, eg:");
// ERROR_LOG("export MDL_PATH=/DEngine/tyexamples/models/nnp310/");
// ERROR_LOG("export DATA_PATH=/DEngine/tyexamples/data/bin/");
// TY_NPU_SysExit();
// return -1;
// }
// npuOp is the size number and path of picture
stNpuOP npuOp;
strncpy(npuOp.mdlPath, "./res/net_combine.bin", MAX_PATH_LEN);
// strncpy(npuOp.mdlPath + strlen(npuOp.mdlPath), "/nfs/xc01nna/npu/caffe_squeezenet_v1.1/net_combine.bin", MAX_PATH_LEN - strlen(npuOp.mdlPath));
// strncpy(npuOp.mdlPath + strlen(npuOp.mdlPath), "tf_squeeznet/net_combine.bin", MAX_PATH_LEN - strlen(npuOp.mdlPath));
npuOp.inImgNum = 1;
npuOp.inImageVec[0].width = 512;
npuOp.inImageVec[0].height= 288;
npuOp.inImageVec[0].fmt = E_TY_PIXEL_FORMAT_RGB_888_PLANAR;//E_TY_PIXEL_FORMAT_YUV_SEMIPLANAR_420;
strncpy(npuOp.inImageVec[0].path, "./res/iss_in0_3x288x512", MAX_PATH_LEN);
// strncpy(npuOp.inImageVec[0].path + strlen(npuOp.inImageVec[0].path), "n02105855_sheepdog_498x374_NV12.yuv", MAX_PATH_LEN - strlen(npuOp.inImageVec[0].path));
// strncpy(npuOp.inImageVec[0].path + strlen(npuOp.inImageVec[0].path), "/nfs/xc01nna/npu/in_picture/iss_in0_3x288x512", MAX_PATH_LEN - strlen(npuOp.inImageVec[0].path));
// define memory allocation method
T_TY_MemSegmentInfo mdlMemInfo;
mdlMemInfo.segNum = 1;
mdlMemInfo.memInfo[0].allocInfo.alignByteSize = 128;
mdlMemInfo.memInfo[0].allocInfo.allocType = E_TY_MEM_VMM_CACHED;
mdlMemInfo.memInfo[0].allocInfo.shareType = E_MEM_EXCLUSIVED;
mdlMemInfo.memInfo[0].allocInfo.size = GetFileSize(npuOp.mdlPath);
allocMemSegments(&mdlMemInfo);
printMemSegments(&mdlMemInfo);
readFile(&mdlMemInfo.memInfo[0].mem, npuOp.mdlPath);
flushVmmMem(mdlMemInfo.memInfo[0].mem.virAddr, mdlMemInfo.memInfo[0].mem.phyAddr, mdlMemInfo.memInfo[0].mem.size);
T_TY_ModelCfgParam mdlCfg;
T_TY_ModelDesc mdlDesc;
TY_NPU_MODEL_HANDLE mdlHdl;
tyRet = TY_NPU_CreateModelFromPhyMem(&mdlMemInfo, &mdlCfg, &mdlMemInfo, &mdlDesc, &mdlHdl);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_CreateModel fail, errCode:%d", tyRet);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
return -1;
}
INFO_LOG("TY_NPU_CreateModel done, handle:%p", mdlHdl);
printMdlDesc(&mdlDesc);
T_TY_TaskCfgParam tskCfg;
T_TY_MemSegmentInfo tskMemInfo;
tyRet = TY_NPU_GetTaskMemSize(mdlHdl, &tskCfg, &tskMemInfo);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_GetTaskMemSize fail, errCode:%d", tyRet);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
return -1;
}
INFO_LOG("TY_NPU_GetTaskMemSize done");
allocMemSegments(&tskMemInfo);
printMemSegments(&tskMemInfo);
TY_NPU_TASK_HANDLE tskHdl;
tyRet = TY_NPU_CreateTask(mdlHdl, &tskCfg, &tskMemInfo, &tskHdl);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_CreateTask fail, errCode:%d", tyRet);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&tskMemInfo);
freeMemSegments(&mdlMemInfo);
return -1;
}
INFO_LOG("TY_NPU_CreateTask done");
E_TY_NpuID npuId = E_TY_NPU_ID_0;
uint32_t mdlInNum = getMdlInputNum(&mdlDesc);
uint32_t mdlOutNum= getMdlOutputNum(&mdlDesc);
T_TY_TaskInput *taskInVec = malloc(sizeof(T_TY_TaskInput) * mdlInNum);
T_TY_TaskOutput *taskOutVec = malloc(sizeof(T_TY_TaskOutput) * mdlOutNum);
if((NULL == taskInVec) || (NULL == taskOutVec)){
ERROR_LOG("malloc input and output error, inNum %d, outNum:%d", mdlInNum, mdlOutNum);
TY_NPU_ReleaseTask(tskHdl);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&tskMemInfo);
freeMemSegments(&mdlMemInfo);
if(taskInVec != NULL) free(taskInVec);
if(taskOutVec!= NULL) free(taskOutVec);
return -1;
}
prepareModelInOutCache(&mdlDesc, &npuOp, taskInVec, mdlInNum, taskOutVec, mdlOutNum);
INFO_LOG("prepareModelInOutCache done");
if(syncMode){
// while(1){
tyRet = TY_NPU_Forward(tskHdl, npuId, mdlInNum, taskInVec, mdlOutNum, taskOutVec);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_Forward fail, errCode:%d", tyRet);
TY_NPU_ReleaseTask(tskHdl);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_Forward done");
// tyRet = SAMPLE_COMM_JPEGE_SaveStream("/nfs/xc01nna/npu/out_picture/out_data", taskOutVec);
writeFile(&taskOutVec->dataOut, "./out_picture/out_data");
// if(TY_NPU_SUCCESS != tyRet){
// ERROR_LOG("write fail, errCode:%d", tyRet);
// }else{
// printf("out file path = '/nfs/xc01nna/npu/out_picture/out_data'");
// }
// system("cat /proc/media-mem");
// caffeSqueezenetPostproc(taskOutVec, mdlOutNum);
// }
}
else{
TY_NPU_NN_HANDLE nnHandle;
tyRet = TY_NPU_ForwardAsync(tskHdl, npuId, mdlInNum, taskInVec, mdlOutNum, taskOutVec, &nnHandle);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_ForwardAsync fail, errCode:%d", tyRet);
TY_NPU_ReleaseTask(tskHdl);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_ForwardAsync send task done");
if(0 == queryPeriod){
int32_t block = 1;
E_TY_Bool status = E_TY_FALSE;
tyRet = TY_NPU_Query(npuId, nnHandle, &status, block);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_Query fail, errCode:%d", tyRet);
TY_NPU_ReleaseTask(tskHdl);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_Query(block mode) done");
caffeSqueezenetPostproc(taskOutVec, mdlOutNum);
}else{
int32_t block = 0;
E_TY_Bool status = E_TY_FALSE;
int32_t waitTime = 0;
while(status != E_TY_TRUE){
waitTime += queryPeriod;
usleep(queryPeriod*1000);
tyRet = TY_NPU_Query(npuId, nnHandle, &status, block);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_Query fail, errCode:%d", tyRet);
TY_NPU_ReleaseTask(tskHdl);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_Query(period mode), wait waitTime:%d(ms)", waitTime);
}
INFO_LOG("TY_NPU_Query(period mode) done");
caffeSqueezenetPostproc(taskOutVec, mdlOutNum);
}
}
tyRet = TY_NPU_ReleaseTask(tskHdl);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_ReleaseTask fail, errCode:%d", tyRet);
TY_NPU_ReleaseModel(mdlHdl);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_ReleaseTask done");
tyRet = TY_NPU_ReleaseModel(mdlHdl);
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_ReleaseModel fail, errCode:%d", tyRet);
TY_NPU_SysExit();
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
return -1;
}
INFO_LOG("TY_NPU_ReleaseModel done");
freeMemSegments(&mdlMemInfo);
freeMemSegments(&tskMemInfo);
freeTaskInOutBuf(taskInVec, mdlInNum, taskOutVec, mdlOutNum);
tyRet = TY_NPU_SysExit();
if(TY_NPU_SUCCESS != tyRet){
ERROR_LOG("TY_NPU_SysExit fail, errCode:%d", tyRet);
return -1;
}
INFO_LOG("TY_NPU_SysExit done");
return 0;
}