inferutils.cpp 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #include "inferutils.h"
  2. #ifndef BATCH_SIZE
  3. #define BATCH_SIZE 1 //prevent any possible errors from BATCH_SIZE
  4. #endif
  5. bool zero_copy_malloc(void** cpu_ptr, void** gpu_ptr, size_t size)
  6. {
  7. if (size==0) {return false;}
  8. cudaError_t alloc_err = cudaHostAlloc(cpu_ptr, size, cudaHostAllocMapped);
  9. if (alloc_err != cudaSuccess)
  10. return false;
  11. cudaError_t err= cudaHostGetDevicePointer(gpu_ptr, *cpu_ptr, 0);
  12. if (err != cudaSuccess)
  13. return false;
  14. memset(*cpu_ptr, 0, size);
  15. return true;
  16. }
  17. bool file_exists(std::string filename)
  18. {
  19. FILE* ftr=NULL;
  20. ftr=fopen(filename.c_str(), "rb");
  21. if (!ftr)
  22. {
  23. return false;
  24. }
  25. else
  26. {
  27. return true;
  28. }
  29. }
  30. bool read_engine_from_disk(char* estream, size_t esize, const std::string enginepath)
  31. {
  32. FILE* efile = NULL;
  33. efile = fopen(enginepath.c_str(), "rb");
  34. if (!efile)
  35. {
  36. //File not found error
  37. return false;
  38. }
  39. std::experimental::filesystem::path fp("/home/agx/agxnvme/trtapp/build/trt.engine");
  40. esize = std::experimental::filesystem::file_size(fp);
  41. if (esize==0)
  42. {
  43. //invalid engine file
  44. return false;
  45. }
  46. else
  47. {
  48. /*DO NOTHING*/
  49. }
  50. estream= (char*)malloc(esize);
  51. const size_t bytesread = fread(estream, 1, esize, efile);
  52. if (bytesread != esize)
  53. {
  54. //corrupt file error
  55. return false;
  56. }
  57. fclose(efile);
  58. return true;
  59. }
  60. bool save_to_disk(nvinfer1::ICudaEngine* eg, std::string filename)
  61. {
  62. nvinfer1::IHostMemory* serialized_engine = eg->serialize();
  63. if (!serialized_engine)
  64. {
  65. return false;
  66. }
  67. const char* edata = (char *)serialized_engine->data();
  68. const size_t esize= serialized_engine->size();
  69. FILE* diskfile=NULL;
  70. diskfile=fopen(filename.c_str(),"wb");
  71. if( fwrite(edata, 1, esize, diskfile) != esize)
  72. {
  73. return false;
  74. }
  75. fclose(diskfile);
  76. serialized_engine->destroy();
  77. return true;
  78. }
  79. uint32_t iobinding::get_size()
  80. {
  81. uint32_t sz=BATCH_SIZE*sizeof(float);
  82. for (int i=0; i<4; i++)
  83. {
  84. sz*=std::max(1,dims.d[i]);
  85. }
  86. return sz;
  87. }
  88. void iobinding::allocate_buffers()
  89. {
  90. if (!size)
  91. size=get_size();
  92. if (!zero_copy_malloc(&tCPU,&tGPU, size))
  93. throw std::runtime_error("Cannot allocate buffers for binding");
  94. cpu_ptr = (float*)tCPU;
  95. gpu_ptr = (float*)tGPU;
  96. }
  97. void iobinding::destroy_buffers()
  98. {
  99. cudaFreeHost(tCPU);
  100. }
  101. nvinfer1::Dims validate_shift(const nvinfer1::Dims dims, bool shift=true)
  102. {
  103. /* FOr compatibility with onnx models in TensorRT 7.0 and above */
  104. nvinfer1::Dims out_dims=dims;
  105. if (shift)
  106. {
  107. out_dims.d[0]=std::max(1,dims.d[1]);
  108. out_dims.d[1]=std::max(1,dims.d[2]);
  109. out_dims.d[2]=std::max(1,dims.d[3]);
  110. out_dims.d[3]=1;
  111. }
  112. for (int n=out_dims.nbDims; n < nvinfer1::Dims::MAX_DIMS; n++)
  113. out_dims.d[n] = 1;
  114. return out_dims;
  115. }
  116. iobinding get_engine_bindings(nvinfer1::ICudaEngine* eg, const char* name, bool is_onnx=true)
  117. {
  118. iobinding io;
  119. io.binding=eg->getBindingIndex(name);
  120. if (io.binding<0)
  121. {
  122. std::cout << "Could not find binding of name: " << name << std::endl;
  123. throw std::runtime_error("Binding not found error");
  124. }
  125. io.name=name;
  126. io.dims=validate_shift(eg->getBindingDimensions(io.binding), is_onnx);
  127. io.allocate_buffers();
  128. return io;
  129. }
  130. cudaStream_t create_cuda_stream(bool nonblocking)
  131. {
  132. uint32_t flags = nonblocking?cudaStreamNonBlocking:cudaStreamDefault;
  133. cudaStream_t stream = NULL;
  134. cudaError_t err = cudaStreamCreateWithFlags(&stream, flags);
  135. if (err != cudaSuccess)
  136. return NULL;
  137. //SetStream(stream);
  138. return stream;
  139. }