k1x-ai-support/src/processor/detection_preprocessor.cc
2024-04-15 11:42:57 +08:00

125 lines
4.2 KiB
C++

#include "src/processor/detection_preprocessor.h"
#include "utils/time.h"
#include "utils/utils.h"
void DetectionPreprocessor::PreprocessNanoDetPlus(
const cv::Mat& mat, std::vector<std::vector<int64_t>>& input_node_dims,
std::vector<std::vector<float>>& input_tensor_values) {
const int input_height = (int)input_node_dims[0][2];
const int input_width = (int)input_node_dims[0][3];
cv::Mat resizedImageBGR, resizedImage, preprocessedImage;
{
#ifdef DEBUG
TimeWatcher t("| |-- Resize unscale");
#endif
if (input_height != mat.cols || input_width != mat.rows) {
resizeUnscale(mat, resizedImage, input_height, input_width);
} else {
resizedImage = mat;
}
}
{
#ifdef DEBUG
TimeWatcher t("| |-- Convert to fp32");
#endif
resizedImage.convertTo(resizedImage, CV_32F, 1.0);
}
{
#ifdef DEBUG
TimeWatcher t("| |-- Normalize");
#endif
cv::Mat channels[3];
cv::split(resizedImage, channels);
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float scale_vals[3] = {57.375f, 57.12f, 58.395f};
int channel = 3;
std::vector<float> input_tensor_value;
for (int i = 0; i < channel; i++) {
channels[i] = (channels[i] - mean_vals[i]) / (scale_vals[i]);
std::vector<float> data = std::vector<float>(channels[i].reshape(1, 1));
input_tensor_value.insert(input_tensor_value.end(), data.begin(),
data.end());
}
input_tensor_values.push_back(input_tensor_value);
}
}
void DetectionPreprocessor::Preprocess(
const cv::Mat& mat, std::vector<std::vector<int64_t>>& input_node_dims,
std::vector<std::vector<float>>& input_tensor_values,
unsigned int data_format) {
if (mat.empty()) return;
if (data_format == 1) {
const int input_height = static_cast<int>(input_node_dims[0][1]);
const int input_width = static_cast<int>(input_node_dims[0][2]);
// resize & unscale
cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
{
#ifdef DEBUG
TimeWatcher t("| |-- Resize unscale");
#endif
if (input_height != mat.cols || input_width != mat.rows) {
resizeUnscale(mat, resizedImageBGR, input_height, input_width);
} else {
resizedImageBGR = mat;
}
}
cv::cvtColor(resizedImageBGR, resizedImageRGB, cv::COLOR_BGR2RGB);
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
const unsigned int target_tensor_size = 3 * input_height * input_width;
std::vector<float> input_tensor_value;
input_tensor_value.resize(target_tensor_size);
std::memcpy(input_tensor_value.data(), resizedImage.data,
target_tensor_size * sizeof(float));
input_tensor_values.push_back(input_tensor_value);
} else {
const int input_height = static_cast<int>(input_node_dims[0][2]);
const int input_width = static_cast<int>(input_node_dims[0][3]);
cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
{
#ifdef DEBUG
TimeWatcher t("| |-- Resize unscale");
#endif
if (input_height != mat.cols || input_width != mat.rows) {
resizeUnscale(mat, resizedImageBGR, input_height, input_width);
} else {
resizedImageBGR = mat;
}
}
{
#ifdef DEBUG
TimeWatcher t("| |-- Convert to RGB");
#endif
// step 3: Convert the image to HWC RGB UINT8 format.
cv::cvtColor(resizedImageBGR, resizedImageRGB, cv::COLOR_BGR2RGB);
}
{
#ifdef DEBUG
TimeWatcher t("| |-- Convert to fp32");
#endif
resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
}
{
#ifdef DEBUG
TimeWatcher t("| |-- Normalize");
#endif
cv::Mat channels[3];
cv::split(resizedImage, channels);
// const float mean_vals[3] = {116.28f, 116.28f, 116.28f};
// const float scale_vals[3] = {0.017429f, 0.017429f, 0.017429f};
int channel = 3;
std::vector<float> input_tensor_value;
for (int i = 0; i < channel; i++) {
// channels[i] = (channels[i] - mean_vals[i]) * scale_vals[i];
std::vector<float> data = std::vector<float>(channels[i].reshape(1, 1));
input_tensor_value.insert(input_tensor_value.end(), data.begin(),
data.end());
}
input_tensor_values.push_back(input_tensor_value);
}
}
}