123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354 |
- /**
- * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
- * Full license terms provided in LICENSE.md file.
- */
- #include <iostream>
- #include <string>
- #include <vector>
- #include <sstream>
- #include <chrono>
- #include <stdexcept>
- #include <fstream>
- #include <opencv2/opencv.hpp>
- #include <NvInfer.h>
- #define MS_PER_SEC 1000.0
- using namespace std;
- using namespace nvinfer1;
- class TestConfig;
- typedef void (*preprocess_fn_t)(float *input, size_t channels, size_t height, size_t width);
- float * imageToTensor(const cv::Mat & image);
- void preprocessVgg(float *input, size_t channels, size_t height, size_t width);
- void preprocessInception(float *input, size_t channels, size_t height, size_t width);
- size_t argmax(float *input, size_t numel);
- void test(const TestConfig &testConfig);
- class TestConfig
- {
- public:
- string imagePath;
- string planPath;
- string inputNodeName;
- string outputNodeName;
- string preprocessFnName;
- string inputHeight;
- string inputWidth;
- string numOutputCategories;
- string dataType;
- string maxBatchSize;
- string workspaceSize;
- string numRuns;
- string useMappedMemory;
- string statsPath;
-
- TestConfig(int argc, char * argv[])
- {
- imagePath = argv[1];
- planPath = argv[2];
- inputNodeName = argv[3];
- inputHeight = argv[4];
- inputWidth = argv[5];
- outputNodeName = argv[6];
- numOutputCategories = argv[7];
- preprocessFnName = argv[8];
- numRuns = argv[9];
- dataType = argv[10];
- maxBatchSize = argv[11];
- workspaceSize = argv[12];
- useMappedMemory = argv[13];
- statsPath = argv[14];
- }
- static string UsageString()
- {
- string s = "";
- s += "imagePath: \n";
- s += "planPath: \n";
- s += "inputNodeName: \n";
- s += "inputHeight: \n";
- s += "inputWidth: \n";
- s += "outputNodeName: \n";
- s += "numOutputCategories: \n";
- s += "preprocessFnName: \n";
- s += "numRuns: \n";
- s += "dataType: \n";
- s += "maxBatchSize: \n";
- s += "workspaceSize: \n";
- s += "useMappedMemory: \n";
- s += "statsPath: \n";
- return s;
- }
- string ToString()
- {
- string s = "";
- s += "imagePath: " + imagePath + "\n";
- s += "planPath: " + planPath + "\n";
- s += "inputNodeName: " + inputNodeName + "\n";
- s += "inputHeight: " + inputHeight + "\n";
- s += "inputWidth: " + inputWidth + "\n";
- s += "outputNodeName: " + outputNodeName + "\n";
- s += "numOutputCategories: " + numOutputCategories + "\n";
- s += "preprocessFnName: " + preprocessFnName + "\n";
- s += "numRuns: " + numRuns + "\n";
- s += "dataType: " + dataType + "\n";
- s += "maxBatchSize: " + maxBatchSize + "\n";
- s += "workspaceSize: " + workspaceSize + "\n";
- s += "useMappedMemory: " + useMappedMemory + "\n";
- s += "statsPath: " + statsPath + "\n";
- return s;
- }
- static int ToInteger(string value)
- {
- int valueInt;
- stringstream ss;
- ss << value;
- ss >> valueInt;
- return valueInt;
- }
- preprocess_fn_t PreprocessFn() const {
- if (preprocessFnName == "preprocess_vgg")
- return preprocessVgg;
- else if (preprocessFnName == "preprocess_inception")
- return preprocessInception;
- else
- throw runtime_error("Invalid preprocessing function name.");
- }
- int InputWidth() const { return ToInteger(inputWidth); }
- int InputHeight() const { return ToInteger(inputHeight); }
- int NumOutputCategories() const { return ToInteger(numOutputCategories); }
- nvinfer1::DataType DataType() const {
- if (dataType == "float")
- return nvinfer1::DataType::kFLOAT;
- else if (dataType == "half")
- return nvinfer1::DataType::kHALF;
- else
- throw runtime_error("Invalid data type.");
- }
-
- int MaxBatchSize() const { return ToInteger(maxBatchSize); }
- int WorkspaceSize() const { return ToInteger(workspaceSize); }
- int NumRuns() const { return ToInteger(numRuns); }
- int UseMappedMemory() const { return ToInteger(useMappedMemory); }
- };
- class Logger : public ILogger
- {
- void log(Severity severity, const char * msg) override
- {
- cout << msg << endl;
- }
- } gLogger;
- int main(int argc, char * argv[])
- {
- if (argc != 15)
- {
- cout << TestConfig::UsageString() << endl;
- return 0;
- }
- TestConfig testConfig(argc, argv);
- cout << "\ntestConfig: \n" << testConfig.ToString() << endl;
- test(testConfig);
- return 0;
- }
- float *imageToTensor(const cv::Mat & image)
- {
- const size_t height = image.rows;
- const size_t width = image.cols;
- const size_t channels = image.channels();
- const size_t numel = height * width * channels;
- const size_t stridesCv[3] = { width * channels, channels, 1 };
- const size_t strides[3] = { height * width, width, 1 };
- float * tensor;
- cudaHostAlloc((void**)&tensor, numel * sizeof(float), cudaHostAllocMapped);
- for (int i = 0; i < height; i++)
- {
- for (int j = 0; j < width; j++)
- {
- for (int k = 0; k < channels; k++)
- {
- const size_t offsetCv = i * stridesCv[0] + j * stridesCv[1] + k * stridesCv[2];
- const size_t offset = k * strides[0] + i * strides[1] + j * strides[2];
- tensor[offset] = (float) image.data[offsetCv];
- }
- }
- }
- return tensor;
- }
- void preprocessVgg(float * tensor, size_t channels, size_t height, size_t width)
- {
- const size_t strides[3] = { height * width, width, 1 };
- const float mean[3] = { 123.68, 116.78, 103.94 };
- for (int i = 0; i < height; i++)
- {
- for (int j = 0; j < width; j++)
- {
- for (int k = 0; k < channels; k++)
- {
- const size_t offset = k * strides[0] + i * strides[1] + j * strides[2];
- tensor[offset] -= mean[k];
- }
- }
- }
- }
- void preprocessInception(float * tensor, size_t channels, size_t height, size_t width)
- {
- const size_t numel = channels * height * width;
- for (int i = 0; i < numel; i++)
- tensor[i] = 2.0 * (tensor[i] / 255.0 - 0.5);
- }
- size_t argmax(float * tensor, size_t numel)
- {
- if (numel <= 0)
- return 0;
- size_t maxIndex = 0;
- float max = tensor[0];
- for (int i = 0; i < numel; i++)
- {
- if (tensor[i] > max)
- {
- maxIndex = i;
- max = tensor[i];
- }
- }
- return maxIndex;
- }
- void test(const TestConfig &testConfig)
- {
- ifstream planFile(testConfig.planPath);
- stringstream planBuffer;
- planBuffer << planFile.rdbuf();
- string plan = planBuffer.str();
- IRuntime *runtime = createInferRuntime(gLogger);
- ICudaEngine *engine = runtime->deserializeCudaEngine((void*)plan.data(),
- plan.size(), nullptr);
- IExecutionContext *context = engine->createExecutionContext();
- int inputBindingIndex, outputBindingIndex;
- inputBindingIndex = engine->getBindingIndex(testConfig.inputNodeName.c_str());
- outputBindingIndex = engine->getBindingIndex(testConfig.outputNodeName.c_str());
- // load and preprocess image
- cv::Mat image = cv::imread(testConfig.imagePath, CV_LOAD_IMAGE_COLOR);
- cv::cvtColor(image, image, cv::COLOR_BGR2RGB, 3);
- cv::resize(image, image, cv::Size(testConfig.InputWidth(), testConfig.InputHeight()));
- float *input = imageToTensor(image);
- testConfig.PreprocessFn()(input, 3, testConfig.InputHeight(), testConfig.InputWidth());
- // allocate memory on host / device for input / output
- float *output;
- float *inputDevice;
- float *outputDevice;
- size_t inputSize = testConfig.InputHeight() * testConfig.InputWidth() * 3 * sizeof(float);
- cudaHostAlloc(&output, testConfig.NumOutputCategories() * sizeof(float), cudaHostAllocMapped);
- if (testConfig.UseMappedMemory())
- {
- cudaHostGetDevicePointer(&inputDevice, input, 0);
- cudaHostGetDevicePointer(&outputDevice, output, 0);
- }
- else
- {
- cudaMalloc(&inputDevice, inputSize);
- cudaMalloc(&outputDevice, testConfig.NumOutputCategories() * sizeof(float));
- }
- float *bindings[2];
- bindings[inputBindingIndex] = inputDevice;
- bindings[outputBindingIndex] = outputDevice;
- // run and compute average time over numRuns iterations
- double avgTime = 0;
- for (int i = 0; i < testConfig.NumRuns() + 1; i++)
- {
- chrono::duration<double> diff;
- if (testConfig.UseMappedMemory())
- {
- auto t0 = chrono::steady_clock::now();
- context->execute(1, (void**)bindings);
- auto t1 = chrono::steady_clock::now();
- diff = t1 - t0;
- }
- else
- {
- auto t0 = chrono::steady_clock::now();
- cudaMemcpy(inputDevice, input, inputSize, cudaMemcpyHostToDevice);
- context->execute(1, (void**)bindings);
- cudaMemcpy(output, outputDevice, testConfig.NumOutputCategories() * sizeof(float), cudaMemcpyDeviceToHost);
- auto t1 = chrono::steady_clock::now();
- diff = t1 - t0;
- }
- if (i != 0)
- avgTime += MS_PER_SEC * diff.count();
- }
- avgTime /= testConfig.NumRuns();
- // save results to file
- int maxCategoryIndex = argmax(output, testConfig.NumOutputCategories()) + 1001 - testConfig.NumOutputCategories();
- cout << "Most likely category id is " << maxCategoryIndex << endl;
- cout << "Average execution time in ms is " << avgTime << endl;
- ofstream outfile;
- outfile.open(testConfig.statsPath, ios_base::app);
- outfile << "\n" << testConfig.planPath
- << " " << avgTime;
- // << " " << maxCategoryIndex
- // << " " << testConfig.InputWidth()
- // << " " << testConfig.InputHeight()
- // << " " << testConfig.MaxBatchSize()
- // << " " << testConfig.WorkspaceSize()
- // << " " << testConfig.dataType
- // << " " << testConfig.NumRuns()
- // << " " << testConfig.UseMappedMemory();
- outfile.close();
- cudaFree(inputDevice);
- cudaFree(outputDevice);
- cudaFreeHost(input);
- cudaFreeHost(output);
- engine->destroy();
- context->destroy();
- runtime->destroy();
- }
|