更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/config_parser.h"
namespace PaddleDetection {
void load_jsonf(std::string jsonfile, Json::Value &jsondata) {
std::ifstream ifs;
ifs.open(jsonfile);
Json::CharReaderBuilder builder;
builder["collectComments"] = true;
JSONCPP_STRING errs;
if (!parseFromStream(builder, ifs, &jsondata, &errs)) {
std::cout << errs << std::endl;
return;
}
}
} // namespace PaddleDetection

View File

@@ -0,0 +1,224 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/keypoint_detector.h"
namespace PaddleDetection {
// Load Model and create model predictor
void KeyPointDetector::LoadModel(std::string model_file, int num_theads) {
MobileConfig config;
config.set_threads(num_theads);
config.set_model_from_file(model_file + "/model.nb");
config.set_power_mode(LITE_POWER_HIGH);
predictor_ = std::move(CreatePaddlePredictor<MobileConfig>(config));
}
// Visualiztion MaskDetector results
cv::Mat VisualizeKptsResult(const cv::Mat& img,
const std::vector<KeyPointResult>& results,
const std::vector<int>& colormap,
float threshold) {
const int edge[][2] = {{0, 1},
{0, 2},
{1, 3},
{2, 4},
{3, 5},
{4, 6},
{5, 7},
{6, 8},
{7, 9},
{8, 10},
{5, 11},
{6, 12},
{11, 13},
{12, 14},
{13, 15},
{14, 16},
{11, 12}};
cv::Mat vis_img = img.clone();
for (int batchid = 0; batchid < results.size(); batchid++) {
for (int i = 0; i < results[batchid].num_joints; i++) {
if (results[batchid].keypoints[i * 3] > threshold) {
int x_coord = int(results[batchid].keypoints[i * 3 + 1]);
int y_coord = int(results[batchid].keypoints[i * 3 + 2]);
cv::circle(vis_img,
cv::Point2d(x_coord, y_coord),
1,
cv::Scalar(0, 0, 255),
2);
}
}
for (int i = 0; i < results[batchid].num_joints; i++) {
if (results[batchid].keypoints[edge[i][0] * 3] > threshold &&
results[batchid].keypoints[edge[i][1] * 3] > threshold) {
int x_start = int(results[batchid].keypoints[edge[i][0] * 3 + 1]);
int y_start = int(results[batchid].keypoints[edge[i][0] * 3 + 2]);
int x_end = int(results[batchid].keypoints[edge[i][1] * 3 + 1]);
int y_end = int(results[batchid].keypoints[edge[i][1] * 3 + 2]);
cv::line(vis_img,
cv::Point2d(x_start, y_start),
cv::Point2d(x_end, y_end),
colormap[i],
1);
}
}
}
return vis_img;
}
void KeyPointDetector::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
preprocessor_.Run(&im, &inputs_);
}
void KeyPointDetector::Postprocess(std::vector<float>& output,
std::vector<int64_t>& output_shape,
std::vector<int64_t>& idxout,
std::vector<int64_t>& idx_shape,
std::vector<KeyPointResult>* result,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs) {
std::vector<float> preds(output_shape[1] * 3, 0);
for (int batchid = 0; batchid < output_shape[0]; batchid++) {
get_final_preds(output,
output_shape,
idxout,
idx_shape,
center_bs[batchid],
scale_bs[batchid],
preds,
batchid,
this->use_dark());
KeyPointResult result_item;
result_item.num_joints = output_shape[1];
result_item.keypoints.clear();
for (int i = 0; i < output_shape[1]; i++) {
result_item.keypoints.emplace_back(preds[i * 3]);
result_item.keypoints.emplace_back(preds[i * 3 + 1]);
result_item.keypoints.emplace_back(preds[i * 3 + 2]);
}
result->push_back(result_item);
}
}
void KeyPointDetector::Predict(const std::vector<cv::Mat> imgs,
std::vector<std::vector<float>>& center_bs,
std::vector<std::vector<float>>& scale_bs,
const int warmup,
const int repeats,
std::vector<KeyPointResult>* result,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
// TODO: reduce cost time
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputByName(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Resize({batch_size, 3, rh, rw});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(in_data_all.data(), in_data_all.size(), inptr);
}
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<int64_t> output_shape, idx_shape;
// Run predictor
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto out_tensor = predictor_->GetTensor(output_names[0]);
auto idx_tensor = predictor_->GetTensor(output_names[1]);
}
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto out_tensor = predictor_->GetTensor(output_names[0]);
output_shape = out_tensor->shape();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
}
output_data_.resize(output_size);
std::copy_n(
out_tensor->mutable_data<float>(), output_size, output_data_.data());
auto idx_tensor = predictor_->GetTensor(output_names[1]);
idx_shape = idx_tensor->shape();
// Calculate output length
output_size = 1;
for (int j = 0; j < idx_shape.size(); ++j) {
output_size *= idx_shape[j];
}
idx_data_.resize(output_size);
std::copy_n(
idx_tensor->mutable_data<int64_t>(), output_size, idx_data_.data());
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
Postprocess(output_data_,
output_shape,
idx_data_,
idx_shape,
result,
center_bs,
scale_bs);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
} // namespace PaddleDetection

View File

@@ -0,0 +1,231 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/keypoint_postprocess.h"
#define PI 3.1415926535
#define HALF_CIRCLE_DEGREE 180
cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) {
cv::Point2f direct{a.x - b.x, a.y - b.y};
return cv::Point2f(a.x - direct.y, a.y + direct.x);
}
std::vector<float> get_dir(float src_point_x,
float src_point_y,
float rot_rad) {
float sn = sin(rot_rad);
float cs = cos(rot_rad);
std::vector<float> src_result{0.0, 0.0};
src_result[0] = src_point_x * cs - src_point_y * sn;
src_result[1] = src_point_x * sn + src_point_y * cs;
return src_result;
}
void affine_tranform(
float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p) {
double new1[3] = {pt_x, pt_y, 1.0};
cv::Mat new_pt(3, 1, trans.type(), new1);
cv::Mat w = trans * new_pt;
preds[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
preds[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
}
void get_affine_transform(std::vector<float>& center,
std::vector<float>& scale,
float rot,
std::vector<int>& output_size,
cv::Mat& trans,
int inv) {
float src_w = scale[0];
float dst_w = static_cast<float>(output_size[0]);
float dst_h = static_cast<float>(output_size[1]);
float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
std::vector<float> src_dir = get_dir(-0.5 * src_w, 0, rot_rad);
std::vector<float> dst_dir{static_cast<float>(-0.5) * dst_w, 0.0};
cv::Point2f srcPoint2f[3], dstPoint2f[3];
srcPoint2f[0] = cv::Point2f(center[0], center[1]);
srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]);
dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
dstPoint2f[1] =
cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]);
if (inv == 0) {
trans = cv::getAffineTransform(srcPoint2f, dstPoint2f);
} else {
trans = cv::getAffineTransform(dstPoint2f, srcPoint2f);
}
}
void transform_preds(std::vector<float>& coords,
std::vector<float>& center,
std::vector<float>& scale,
std::vector<int>& output_size,
std::vector<int64_t>& dim,
std::vector<float>& target_coords,
bool affine=false) {
if (affine) {
cv::Mat trans(2, 3, CV_64FC1);
get_affine_transform(center, scale, 0, output_size, trans, 1);
for (int p = 0; p < dim[1]; ++p) {
affine_tranform(
coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
}
} else {
float heat_w = static_cast<float>(output_size[0]);
float heat_h = static_cast<float>(output_size[1]);
float x_scale = scale[0] / heat_w;
float y_scale = scale[1] / heat_h;
float offset_x = center[0] - scale[0] / 2.;
float offset_y = center[1] - scale[1] / 2.;
for (int i = 0; i < dim[1]; i++) {
target_coords[i * 3 + 1] = x_scale * coords[i * 2] + offset_x;
target_coords[i * 3 + 2] = y_scale * coords[i * 2 + 1] + offset_y;
}
}
}
// only for batchsize == 1
void get_max_preds(std::vector<float>& heatmap,
std::vector<int>& dim,
std::vector<float>& preds,
std::vector<float>& maxvals,
int batchid,
int joint_idx) {
int num_joints = dim[1];
int width = dim[3];
std::vector<int> idx;
idx.resize(num_joints * 2);
for (int j = 0; j < dim[1]; j++) {
float* index = &(
heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]);
float* end = index + dim[2] * dim[3];
float* max_dis = std::max_element(index, end);
auto max_id = std::distance(index, max_dis);
maxvals[j] = *max_dis;
if (*max_dis > 0) {
preds[j * 2] = static_cast<float>(max_id % width);
preds[j * 2 + 1] = static_cast<float>(max_id / width);
}
}
}
void dark_parse(std::vector<float>& heatmap,
std::vector<int64_t>& dim,
std::vector<float>& coords,
int px,
int py,
int index,
int ch){
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
1) offset = - hassian.inv() * derivative
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
3) dxx = (dx[x+1] - dx[x-1])/2.
4) derivative = Mat([dx, dy])
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
*/
std::vector<float>::const_iterator first1 = heatmap.begin() + index;
std::vector<float>::const_iterator last1 = heatmap.begin() + index + dim[2] * dim[3];
std::vector<float> heatmap_ch(first1, last1);
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0,dim[2]);
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
heatmap_mat = heatmap_mat.reshape(1,1);
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1,1));
float epsilon = 1e-10;
//sample heatmap to get values in around target location
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
//compute dx/dy and dxx/dyy with sampled values
float dx = 0.5 * (xr - xl);
float dy = 0.5 * (yu - yd);
float dxx = 0.25 * (xr2 - 2*xy + xl2);
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
float dyy = 0.25 * (yu2 - 2*xy + yd2);
//finally get offset by derivative and hassian, which combined by dx/dy and dxx/dyy
if(dxx * dyy - dxy*dxy != 0){
float M[2][2] = {dxx, dxy, dxy, dyy};
float D[2] = {dx, dy};
cv::Mat hassian(2,2,CV_32F,M);
cv::Mat derivative(2,1,CV_32F,D);
cv::Mat offset = - hassian.inv() * derivative;
coords[ch * 2] += offset.at<float>(0,0);
coords[ch * 2 + 1] += offset.at<float>(1,0);
}
}
void get_final_preds(std::vector<float>& heatmap,
std::vector<int64_t>& dim,
std::vector<int64_t>& idxout,
std::vector<int64_t>& idxdim,
std::vector<float>& center,
std::vector<float> scale,
std::vector<float>& preds,
int batchid,
bool DARK) {
std::vector<float> coords;
coords.resize(dim[1] * 2);
int heatmap_height = dim[2];
int heatmap_width = dim[3];
for (int j = 0; j < dim[1]; ++j) {
int index = (batchid * dim[1] + j) * dim[2] * dim[3];
int idx = idxout[batchid * dim[1] + j];
preds[j * 3] = heatmap[index + idx];
coords[j * 2] = idx % heatmap_width;
coords[j * 2 + 1] = idx / heatmap_width;
int px = int(coords[j * 2] + 0.5);
int py = int(coords[j * 2 + 1] + 0.5);
if(DARK && px > 1 && px < heatmap_width - 2){
dark_parse(heatmap, dim, coords, px, py, index, j);
}
else{
if (px > 0 && px < heatmap_width - 1) {
float diff_x = heatmap[index + py * dim[3] + px + 1] -
heatmap[index + py * dim[3] + px - 1];
coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
}
if (py > 0 && py < heatmap_height - 1) {
float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
heatmap[index + (py - 1) * dim[3] + px];
coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
}
}
}
std::vector<int> img_size{heatmap_width, heatmap_height};
transform_preds(coords, center, scale, img_size, dim, preds);
}

View File

@@ -0,0 +1,388 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "include/config_parser.h"
#include "include/keypoint_detector.h"
#include "include/object_detector.h"
#include "include/preprocess_op.h"
#include "json/json.h"
Json::Value RT_Config;
void PrintBenchmarkLog(std::vector<double> det_time, int img_num) {
std::cout << "----------------------- Config info -----------------------"
<< std::endl;
std::cout << "num_threads: " << RT_Config["cpu_threads"].as<int>()
<< std::endl;
std::cout << "----------------------- Data info -----------------------"
<< std::endl;
std::cout << "batch_size_det: " << RT_Config["batch_size_det"].as<int>()
<< std::endl;
std::cout << "----------------------- Model info -----------------------"
<< std::endl;
RT_Config["model_dir_det"].as<std::string>().erase(
RT_Config["model_dir_det"].as<std::string>().find_last_not_of("/") + 1);
std::cout << "detection model_name: "
<< RT_Config["model_dir_det"].as<std::string>() << std::endl;
std::cout << "----------------------- Perf info ------------------------"
<< std::endl;
std::cout << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.)
<< std::endl;
img_num = std::max(1, img_num);
std::cout << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num << std::endl;
}
void PrintKptsBenchmarkLog(std::vector<double> det_time, int img_num) {
std::cout << "----------------------- Data info -----------------------"
<< std::endl;
std::cout << "batch_size_keypoint: "
<< RT_Config["batch_size_keypoint"].as<int>() << std::endl;
std::cout << "----------------------- Model info -----------------------"
<< std::endl;
RT_Config["model_dir_keypoint"].as<std::string>().erase(
RT_Config["model_dir_keypoint"].as<std::string>().find_last_not_of("/") +
1);
std::cout << "keypoint model_name: "
<< RT_Config["model_dir_keypoint"].as<std::string>() << std::endl;
std::cout << "----------------------- Perf info ------------------------"
<< std::endl;
std::cout << "Total number of predicted data: " << img_num
<< " and total time spent(ms): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.)
<< std::endl;
img_num = std::max(1, img_num);
std::cout << "Average time cost per person:" << std::endl
<< "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] / img_num << std::endl;
}
void PrintTotalIimeLog(double det_time,
double keypoint_time,
double crop_time) {
std::cout << "----------------------- Time info ------------------------"
<< std::endl;
std::cout << "Total Pipeline time(ms) per image: "
<< det_time + keypoint_time + crop_time << std::endl;
std::cout << "Average det time(ms) per image: " << det_time
<< ", average keypoint time(ms) per image: " << keypoint_time
<< ", average crop time(ms) per image: " << crop_time << std::endl;
}
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
ret = mkdir(path.c_str(), 0755);
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
void PredictImage(const std::vector<std::string> all_img_paths,
const int batch_size_det,
const double threshold_det,
const bool run_benchmark,
PaddleDetection::ObjectDetector* det,
PaddleDetection::KeyPointDetector* keypoint,
const std::string& output_dir = "output") {
std::vector<double> det_t = {0, 0, 0};
int steps = ceil(static_cast<float>(all_img_paths.size()) / batch_size_det);
int kpts_imgs = 0;
std::vector<double> keypoint_t = {0, 0, 0};
double midtimecost = 0;
for (int idx = 0; idx < steps; idx++) {
std::vector<cv::Mat> batch_imgs;
int left_image_cnt = all_img_paths.size() - idx * batch_size_det;
if (left_image_cnt > batch_size_det) {
left_image_cnt = batch_size_det;
}
for (int bs = 0; bs < left_image_cnt; bs++) {
std::string image_file_path = all_img_paths.at(idx * batch_size_det + bs);
cv::Mat im = cv::imread(image_file_path, 1);
batch_imgs.insert(batch_imgs.end(), im);
}
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<int> bbox_num;
std::vector<double> det_times;
// Store keypoint results
std::vector<PaddleDetection::KeyPointResult> result_kpts;
std::vector<cv::Mat> imgs_kpts;
std::vector<std::vector<float>> center_bs;
std::vector<std::vector<float>> scale_bs;
std::vector<int> colormap_kpts = PaddleDetection::GenerateColorMap(20);
bool is_rbox = false;
if (run_benchmark) {
det->Predict(
batch_imgs, threshold_det, 50, 50, &result, &bbox_num, &det_times);
} else {
det->Predict(
batch_imgs, threshold_det, 0, 1, &result, &bbox_num, &det_times);
}
// get labels and colormap
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
int item_start_idx = 0;
for (int i = 0; i < left_image_cnt; i++) {
cv::Mat im = batch_imgs[i];
std::vector<PaddleDetection::ObjectResult> im_result;
int detect_num = 0;
for (int j = 0; j < bbox_num[i]; j++) {
PaddleDetection::ObjectResult item = result[item_start_idx + j];
if (item.confidence < threshold_det || item.class_id == -1) {
continue;
}
detect_num += 1;
im_result.push_back(item);
if (item.rect.size() > 6) {
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
} else {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
std::cout << all_img_paths.at(idx * batch_size_det + i)
<< " The number of detected box: " << detect_num << std::endl;
item_start_idx = item_start_idx + bbox_num[i];
std::vector<int> compression_params;
compression_params.push_back(cv::IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
std::string output_path(output_dir);
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP;
}
std::string image_file_path = all_img_paths.at(idx * batch_size_det + i);
if (keypoint) {
int imsize = im_result.size();
for (int i = 0; i < imsize; i++) {
auto keypoint_start_time = std::chrono::steady_clock::now();
auto item = im_result[i];
cv::Mat crop_img;
std::vector<double> keypoint_times;
std::vector<int> rect = {
item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
std::vector<float> center;
std::vector<float> scale;
if (item.class_id == 0) {
PaddleDetection::CropImg(im, crop_img, rect, center, scale);
center_bs.emplace_back(center);
scale_bs.emplace_back(scale);
imgs_kpts.emplace_back(crop_img);
kpts_imgs += 1;
}
auto keypoint_crop_time = std::chrono::steady_clock::now();
std::chrono::duration<float> midtimediff =
keypoint_crop_time - keypoint_start_time;
midtimecost += static_cast<double>(midtimediff.count() * 1000);
if (imgs_kpts.size() == RT_Config["batch_size_keypoint"].as<int>() ||
((i == imsize - 1) && !imgs_kpts.empty())) {
if (run_benchmark) {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
10,
10,
&result_kpts,
&keypoint_times);
} else {
keypoint->Predict(imgs_kpts,
center_bs,
scale_bs,
0,
1,
&result_kpts,
&keypoint_times);
}
imgs_kpts.clear();
center_bs.clear();
scale_bs.clear();
keypoint_t[0] += keypoint_times[0];
keypoint_t[1] += keypoint_times[1];
keypoint_t[2] += keypoint_times[2];
}
}
std::string kpts_savepath =
output_path + "keypoint_" +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::Mat kpts_vis_img = VisualizeKptsResult(
im, result_kpts, colormap_kpts, keypoint->get_threshold());
cv::imwrite(kpts_savepath, kpts_vis_img, compression_params);
printf("Visualized output saved as %s\n", kpts_savepath.c_str());
} else {
// Visualization result
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, im_result, labels, colormap, is_rbox);
std::string det_savepath =
output_path + "result_" +
image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(det_savepath, vis_img, compression_params);
printf("Visualized output saved as %s\n", det_savepath.c_str());
}
}
det_t[0] += det_times[0];
det_t[1] += det_times[1];
det_t[2] += det_times[2];
}
PrintBenchmarkLog(det_t, all_img_paths.size());
if (keypoint) {
PrintKptsBenchmarkLog(keypoint_t, kpts_imgs);
PrintTotalIimeLog(
(det_t[0] + det_t[1] + det_t[2]) / all_img_paths.size(),
(keypoint_t[0] + keypoint_t[1] + keypoint_t[2]) / all_img_paths.size(),
midtimecost / all_img_paths.size());
}
}
int main(int argc, char** argv) {
std::cout << "Usage: " << argv[0] << " [config_path] [image_dir](option)\n";
if (argc < 2) {
std::cout << "Usage: ./main det_runtime_config.json" << std::endl;
return -1;
}
std::string config_path = argv[1];
std::string img_path = "";
if (argc >= 3) {
img_path = argv[2];
}
// Parsing command-line
PaddleDetection::load_jsonf(config_path, RT_Config);
if (RT_Config["model_dir_det"].as<std::string>().empty()) {
std::cout << "Please set [model_det_dir] in " << config_path << std::endl;
return -1;
}
if (RT_Config["image_file"].as<std::string>().empty() &&
RT_Config["image_dir"].as<std::string>().empty() && img_path.empty()) {
std::cout << "Please set [image_file] or [image_dir] in " << config_path
<< " Or use command: <" << argv[0] << " [image_dir]>"
<< std::endl;
return -1;
}
if (!img_path.empty()) {
std::cout << "Use image_dir in command line overide the path in config file"
<< std::endl;
RT_Config["image_dir"] = img_path;
RT_Config["image_file"] = "";
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(
RT_Config["model_dir_det"].as<std::string>(),
RT_Config["cpu_threads"].as<int>(),
RT_Config["batch_size_det"].as<int>());
PaddleDetection::KeyPointDetector* keypoint = nullptr;
if (!RT_Config["model_dir_keypoint"].as<std::string>().empty()) {
keypoint = new PaddleDetection::KeyPointDetector(
RT_Config["model_dir_keypoint"].as<std::string>(),
RT_Config["cpu_threads"].as<int>(),
RT_Config["batch_size_keypoint"].as<int>(),
RT_Config["use_dark_decode"].as<bool>());
RT_Config["batch_size_det"] = 1;
printf(
"batchsize of detection forced to be 1 while keypoint model is not "
"empty()");
}
// Do inference on input image
if (!RT_Config["image_file"].as<std::string>().empty() ||
!RT_Config["image_dir"].as<std::string>().empty()) {
if (!PathExists(RT_Config["output_dir"].as<std::string>())) {
MkDirs(RT_Config["output_dir"].as<std::string>());
}
std::vector<std::string> all_img_paths;
std::vector<cv::String> cv_all_img_paths;
if (!RT_Config["image_file"].as<std::string>().empty()) {
all_img_paths.push_back(RT_Config["image_file"].as<std::string>());
if (RT_Config["batch_size_det"].as<int>() > 1) {
std::cout << "batch_size_det should be 1, when set `image_file`."
<< std::endl;
return -1;
}
} else {
cv::glob(RT_Config["image_dir"].as<std::string>(), cv_all_img_paths);
for (const auto& img_path : cv_all_img_paths) {
all_img_paths.push_back(img_path);
}
}
PredictImage(all_img_paths,
RT_Config["batch_size_det"].as<int>(),
RT_Config["threshold_det"].as<float>(),
RT_Config["run_benchmark"].as<bool>(),
&det,
keypoint,
RT_Config["output_dir"].as<std::string>());
}
delete keypoint;
keypoint = nullptr;
return 0;
}

View File

@@ -0,0 +1,329 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/object_detector.h"
namespace PaddleDetection {
// Load Model and create model predictor
void ObjectDetector::LoadModel(std::string model_file, int num_theads) {
MobileConfig config;
config.set_threads(num_theads);
config.set_model_from_file(model_file + "/model.nb");
config.set_power_mode(LITE_POWER_HIGH);
predictor_ = CreatePaddlePredictor<MobileConfig>(config);
}
// Visualiztion MaskDetector results
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<PaddleDetection::ObjectResult>& results,
const std::vector<std::string>& lables,
const std::vector<int>& colormap,
const bool is_rbox = false) {
cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) {
// Configure color and text size
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
oss << lables[results[i].class_id] << " ";
oss << results[i].confidence;
std::string text = oss.str();
int c1 = colormap[3 * results[i].class_id + 0];
int c2 = colormap[3 * results[i].class_id + 1];
int c3 = colormap[3 * results[i].class_id + 2];
cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
double font_scale = 0.5f;
float thickness = 0.5;
cv::Size text_size =
cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
cv::Point origin;
if (is_rbox) {
// Draw object, text, and background
for (int k = 0; k < 4; k++) {
cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
results[i].rect[(k * 2 + 1) % 8]);
cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
results[i].rect[(k * 2 + 3) % 8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
} else {
int w = results[i].rect[2] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[1];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0],
results[i].rect[1] - text_size.height,
text_size.width,
text_size.height);
// Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
}
return vis_img;
}
void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
preprocessor_.Run(&im, &inputs_);
}
void ObjectDetector::Postprocess(const std::vector<cv::Mat> mats,
std::vector<PaddleDetection::ObjectResult>* result,
std::vector<int> bbox_num,
bool is_rbox = false) {
result->clear();
int start_idx = 0;
for (int im_id = 0; im_id < mats.size(); im_id++) {
cv::Mat raw_mat = mats[im_id];
int rh = 1;
int rw = 1;
if (config_.arch_ == "Face") {
rh = raw_mat.rows;
rw = raw_mat.cols;
}
for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
if (is_rbox) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
// Confidence score
float score = output_data_[1 + j * 10];
int x1 = (output_data_[2 + j * 10] * rw);
int y1 = (output_data_[3 + j * 10] * rh);
int x2 = (output_data_[4 + j * 10] * rw);
int y2 = (output_data_[5 + j * 10] * rh);
int x3 = (output_data_[6 + j * 10] * rw);
int y3 = (output_data_[7 + j * 10] * rh);
int x4 = (output_data_[8 + j * 10] * rw);
int y4 = (output_data_[9 + j * 10] * rh);
PaddleDetection::ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
} else {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
start_idx += bbox_num[im_id];
}
}
void ObjectDetector::Predict(const std::vector<cv::Mat>& imgs,
const double threshold,
const int warmup,
const int repeats,
std::vector<PaddleDetection::ObjectResult>* result,
std::vector<int>* bbox_num,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
// TODO: reduce cost time
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<const float *> output_data_list_;
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputByName(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Resize({batch_size, 3, rh, rw});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(in_data_all.data(), in_data_all.size(), inptr);
} else if (tensor_name == "im_shape") {
in_tensor->Resize({batch_size, 2});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(im_shape_all.data(), im_shape_all.size(), inptr);
} else if (tensor_name == "scale_factor") {
in_tensor->Resize({batch_size, 2});
auto* inptr = in_tensor->mutable_data<float>();
std::copy_n(scale_factor_all.data(), scale_factor_all.size(), inptr);
}
}
// Run predictor
// warmup
for (int i = 0; i < warmup; i++) {
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
if (config_.arch_ == "PicoDet") {
for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetTensor(output_names[j]);
const float* outptr = output_tensor->data<float>();
std::vector<int64_t> output_shape = output_tensor->shape();
output_data_list_.push_back(outptr);
}
} else {
auto out_tensor = predictor_->GetTensor(output_names[0]);
auto out_bbox_num = predictor_->GetTensor(output_names[1]);
}
}
bool is_rbox = false;
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
predictor_->Run();
}
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Get output tensor
output_data_list_.clear();
int num_class = 80;
int reg_max = 7;
auto output_names = predictor_->GetOutputNames();
// TODO: Unified model output.
if (config_.arch_ == "PicoDet") {
for (int i = 0; i < output_names.size(); i++) {
auto output_tensor = predictor_->GetTensor(output_names[i]);
const float* outptr = output_tensor->data<float>();
std::vector<int64_t> output_shape = output_tensor->shape();
if (i == 0) {
num_class = output_shape[2];
}
if (i == config_.fpn_stride_.size()) {
reg_max = output_shape[2] / 4 - 1;
}
output_data_list_.push_back(outptr);
}
} else {
auto output_tensor = predictor_->GetTensor(output_names[0]);
auto output_shape = output_tensor->shape();
auto out_bbox_num = predictor_->GetTensor(output_names[1]);
auto out_bbox_num_shape = out_bbox_num->shape();
// Calculate output length
int output_size = 1;
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
is_rbox = output_shape[output_shape.size() - 1] % 10 == 0;
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
}
output_data_.resize(output_size);
std::copy_n(
output_tensor->mutable_data<float>(), output_size, output_data_.data());
int out_bbox_num_size = 1;
for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
out_bbox_num_size *= out_bbox_num_shape[j];
}
out_bbox_num_data_.resize(out_bbox_num_size);
std::copy_n(out_bbox_num->mutable_data<int>(),
out_bbox_num_size,
out_bbox_num_data_.data());
}
// Postprocessing result
result->clear();
if (config_.arch_ == "PicoDet") {
PaddleDetection::PicoDetPostProcess(
result, output_data_list_, config_.fpn_stride_,
inputs_.im_shape_, inputs_.scale_factor_,
config_.nms_info_["score_threshold"].as<float>(),
config_.nms_info_["nms_threshold"].as<float>(), num_class, reg_max);
bbox_num->push_back(result->size());
} else {
Postprocess(imgs, result, out_bbox_num_data_, is_rbox);
bbox_num->clear();
for (int k = 0; k < out_bbox_num_data_.size(); k++) {
int tmp = out_bbox_num_data_[k];
bbox_num->push_back(tmp);
}
}
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
std::vector<int> GenerateColorMap(int num_class) {
auto colormap = std::vector<int>(3 * num_class, 0);
for (int i = 0; i < num_class; ++i) {
int j = 0;
int lab = i;
while (lab) {
colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
++j;
lab >>= 3;
}
}
return colormap;
}
} // namespace PaddleDetection

View File

@@ -0,0 +1,128 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The code is based on:
// https://github.com/RangiLyu/nanodet/blob/main/demo_mnn/nanodet_mnn.cpp
#include "include/picodet_postprocess.h"
namespace PaddleDetection {
float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
// PicoDet decode
PaddleDetection::ObjectResult
disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y,
int stride, std::vector<float> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
reg_max + 1);
for (int j = 0; j < reg_max + 1; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f);
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f);
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]);
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]);
PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = label;
result_item.confidence = score;
return result_item;
}
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
std::vector<const float *> outs,
std::vector<int> fpn_stride,
std::vector<float> im_shape,
std::vector<float> scale_factor, float score_threshold,
float nms_threshold, int num_class, int reg_max) {
std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results;
bbox_results.resize(num_class);
int in_h = im_shape[0], in_w = im_shape[1];
for (int i = 0; i < fpn_stride.size(); ++i) {
int feature_h = ceil((float)in_h / fpn_stride[i]);
int feature_w = ceil((float)in_w / fpn_stride[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = outs[i] + (idx * num_class);
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class; label++) {
if (scores[label] > score) {
score = scores[label];
cur_label = label;
}
}
if (score > score_threshold) {
const float *bbox_pred =
outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1));
bbox_results[cur_label].push_back(
disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i],
im_shape, reg_max));
}
}
}
for (int i = 0; i < (int)bbox_results.size(); i++) {
PaddleDetection::nms(bbox_results[i], nms_threshold);
for (auto box : bbox_results[i]) {
box.rect[0] = box.rect[0] / scale_factor[1];
box.rect[2] = box.rect[2] / scale_factor[1];
box.rect[1] = box.rect[1] / scale_factor[0];
box.rect[3] = box.rect[3] / scale_factor[0];
results->push_back(box);
}
}
}
} // namespace PaddleDetection

View File

@@ -0,0 +1,185 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <thread>
#include <vector>
#include "include/preprocess_op.h"
namespace PaddleDetection {
void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
data->im_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
data->scale_factor_ = {1., 1.};
data->in_net_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
}
void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) {
double e = 1.0;
if (is_scale_) {
e *= 1./255.0;
}
(*im).convertTo(*im, CV_32FC3, e);
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
}
}
}
void Permute::Run(cv::Mat* im, ImageBlob* data) {
(*im).convertTo(*im, CV_32FC3);
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
(data->im_data_).resize(rc * rh * rw);
float* base = (data->im_data_).data();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
}
}
void Resize::Run(cv::Mat* im, ImageBlob* data) {
auto resize_scale = GenerateScale(*im);
data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
cv::resize(
*im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
data->im_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
data->scale_factor_ = {
resize_scale.second, resize_scale.first,
};
}
std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
std::pair<float, float> resize_scale;
int origin_w = im.cols;
int origin_h = im.rows;
if (keep_ratio_) {
int im_size_max = std::max(origin_w, origin_h);
int im_size_min = std::min(origin_w, origin_h);
int target_size_max =
*std::max_element(target_size_.begin(), target_size_.end());
int target_size_min =
*std::min_element(target_size_.begin(), target_size_.end());
float scale_min =
static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
float scale_max =
static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
float scale_ratio = std::min(scale_min, scale_max);
resize_scale = {scale_ratio, scale_ratio};
} else {
resize_scale.first =
static_cast<float>(target_size_[1]) / static_cast<float>(origin_w);
resize_scale.second =
static_cast<float>(target_size_[0]) / static_cast<float>(origin_h);
}
return resize_scale;
}
void PadStride::Run(cv::Mat* im, ImageBlob* data) {
if (stride_ <= 0) {
return;
}
int rc = im->channels();
int rh = im->rows;
int rw = im->cols;
int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
cv::copyMakeBorder(
*im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
data->in_net_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
}
void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) {
cv::resize(*im, *im, cv::Size(trainsize_[0], trainsize_[1]), 0, 0, interp_);
// todo: Simd::ResizeBilinear();
data->in_net_shape_ = {
static_cast<float>(trainsize_[1]), static_cast<float>(trainsize_[0]),
};
}
// Preprocessor op running order
const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
"TopDownEvalAffine",
"Resize",
"NormalizeImage",
"PadStride",
"Permute"};
void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
for (const auto& name : RUN_ORDER) {
if (ops_.find(name) != ops_.end()) {
ops_[name]->Run(im, data);
}
}
}
void CropImg(cv::Mat& img,
cv::Mat& crop_img,
std::vector<int>& area,
std::vector<float>& center,
std::vector<float>& scale,
float expandratio) {
int crop_x1 = std::max(0, area[0]);
int crop_y1 = std::max(0, area[1]);
int crop_x2 = std::min(img.cols - 1, area[2]);
int crop_y2 = std::min(img.rows - 1, area[3]);
int center_x = (crop_x1 + crop_x2) / 2.;
int center_y = (crop_y1 + crop_y2) / 2.;
int half_h = (crop_y2 - crop_y1) / 2.;
int half_w = (crop_x2 - crop_x1) / 2.;
if (half_h * 3 > half_w * 4) {
half_w = static_cast<int>(half_h * 0.75);
} else {
half_h = static_cast<int>(half_w * 4 / 3);
}
crop_x1 =
std::max(0, center_x - static_cast<int>(half_w * (1 + expandratio)));
crop_y1 =
std::max(0, center_y - static_cast<int>(half_h * (1 + expandratio)));
crop_x2 = std::min(img.cols - 1,
static_cast<int>(center_x + half_w * (1 + expandratio)));
crop_y2 = std::min(img.rows - 1,
static_cast<int>(center_y + half_h * (1 + expandratio)));
crop_img =
img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
center.clear();
center.emplace_back((crop_x1 + crop_x2) / 2);
center.emplace_back((crop_y1 + crop_y2) / 2);
scale.clear();
scale.emplace_back((crop_x2 - crop_x1));
scale.emplace_back((crop_y2 - crop_y1));
}
} // namespace PaddleDetection

View File

@@ -0,0 +1,49 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "include/utils.h"
namespace PaddleDetection {
void nms(std::vector<ObjectResult> &input_boxes, float nms_threshold) {
std::sort(input_boxes.begin(),
input_boxes.end(),
[](ObjectResult a, ObjectResult b) { return a.confidence > b.confidence; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).rect[2] - input_boxes.at(i).rect[0] + 1)
* (input_boxes.at(i).rect[3] - input_boxes.at(i).rect[1] + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].rect[0], input_boxes[j].rect[0]);
float yy1 = (std::max)(input_boxes[i].rect[1], input_boxes[j].rect[1]);
float xx2 = (std::min)(input_boxes[i].rect[2], input_boxes[j].rect[2]);
float yy2 = (std::min)(input_boxes[i].rect[3], input_boxes[j].rect[3]);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_threshold) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else {
j++;
}
}
}
}
} // namespace PaddleDetection