Update for v1.0

2025-06-27 17:11:00 -04:00 · 2024-05-30 23:19:44 +08:00 · 2024-05-30 23:19:44 +08:00 · adf80d91df
commit adf80d91df
parent 46cf2741d2
76 changed files with 1487 additions and 716 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,8 +1,11 @@
 .vs
 .vscode

+*.rc
+
 out
 build*
 CMakeSettings.json

-/data
+/data
+/third_parties
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -163,18 +163,29 @@ x86_64-test-job:    # This job runs in the test stage.
    - build-job-x86_64
  script:
    - echo "Running unit tests ... (This will take several seconds.)"
+    - export SUPPORT_SHOW=-1
+    # smoke test for image classification
    - export LD_LIBRARY_PATH=${ORT_HOME_X86}/lib:$LD_LIBRARY_PATH
    - result=`demo/build.x86_64/classification_demo $DATA/models/squeezenet1.1-7.onnx $DATA/models/synset.txt $DATA/imgs/dog.jpg`
    - $([[ ${result:0-30:8} == "Pembroke" ]])
+    # smoke test for object detection
    - demo/build.x86_64/detection_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/imgs/person.jpg result0.jpg
    - $([[ "b604ad08d6283995ec8dcdbddc2482c2" == "$(md5sum result0.jpg | awk '{print $1}')" ]])
+    - sed -i 's/\/usr/rootfs\/usr/g' $DATA/models/yolov6.json
+    - demo/build.x86_64/detection_demo $DATA/models/yolov6.json $DATA/imgs/person.jpg result0.jpg
+    - $([[ "b604ad08d6283995ec8dcdbddc2482c2" == "$(md5sum result0.jpg | awk '{print $1}')" ]])
    - demo/build.x86_64/detection_video_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/videos/test.mp4 test.avi
    - $([[ "760c5bb692fe302621265126f70bd7e9" == "$(md5sum test.avi | awk '{print $1}')" ]])
-    - export SUPPORT_SHOW=-1
-    - demo/build.x86_64/detection_stream_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/videos/test.mp4 video
+    - demo/build.x86_64/detection_stream_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/videos/test.mp4
+    - demo/build.x86_64/detection_stream_demo $DATA/models/yolov6.json $DATA/videos/test.mp4
+    # smoke test for human pose estimation
    - demo/build.x86_64/estimation_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/models/rtmpose-t.q.onnx $DATA/imgs/person.jpg result1.jpg
-    - $([[ "dc531ec076418357e8478a0a2823285d" == "$(md5sum result1.jpg | awk '{print $1}')" ]])
-    - demo/build.x86_64/tracker_stream_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/models/rtmpose-t.q.onnx $DATA/videos/test.mp4 video
+    - $([[ "aa7a9492408bc41917178e242478a15c" == "$(md5sum result1.jpg | awk '{print $1}')" ]])
+    - sed -i 's/\/usr/rootfs\/usr/g' $DATA/models/rtmpose.json
+    - demo/build.x86_64/estimation_demo $DATA/models/yolov6.json $DATA/models/rtmpose.json $DATA/imgs/person.jpg result1.jpg
+    - $([[ "aa7a9492408bc41917178e242478a15c" == "$(md5sum result1.jpg | awk '{print $1}')" ]])
+    - demo/build.x86_64/tracker_stream_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/models/rtmpose-t.q.onnx $DATA/videos/test.mp4
+    - demo/build.x86_64/tracker_stream_demo $DATA/models/yolov6.json $DATA/models/rtmpose.json $DATA/videos/test.mp4
    - echo "Running x86_64 tests done!"
  <<: *only-defaults

@ -184,12 +195,21 @@ riscv64-test-job:   # This job runs in the test stage.
    - build-job-riscv64
  script:
    - echo "Running unit tests ... (This will take several seconds.)"
+    # smoke test for image classification
    - result=`${QEMU_CMD} demo/build.riscv64/classification_demo $DATA/models/squeezenet1.1-7.onnx $DATA/models/synset.txt $DATA/imgs/dog.jpg`
    - $([[ ${result:0-30:8} == "Pembroke" ]])
+    # smoke test for object detection
    - ${QEMU_CMD} demo/build.riscv64/detection_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/imgs/person.jpg result0.jpg
    - $([[ "81d50314ee64d46c9d4c9941711820ba" == "$(md5sum result0.jpg | awk '{print $1}')" ]])
+    - sed -i 's/\/usr/rootfs\/usr/g' $DATA/models/yolov6.json
+    - ${QEMU_CMD} demo/build.riscv64/detection_demo $DATA/models/yolov6.json $DATA/imgs/person.jpg result0.jpg
+    - $([[ "81d50314ee64d46c9d4c9941711820ba" == "$(md5sum result0.jpg | awk '{print $1}')" ]])
+    # smoke test for human pose estimation
    - ${QEMU_CMD} demo/build.riscv64/estimation_demo $DATA/models/yolov6p5_n.q.onnx $DATA/models/coco.txt $DATA/models/rtmpose-t.q.onnx $DATA/imgs/person.jpg result1.jpg
-    - $([[ "e2cb2b8d9e216d68ff653415f92be86c" == "$(md5sum result1.jpg | awk '{print $1}')" ]])
+    - $([[ "2becf268d0c8da27207ee1dec934e80b" == "$(md5sum result1.jpg | awk '{print $1}')" ]])
+    - sed -i 's/\/usr/rootfs\/usr/g' $DATA/models/rtmpose.json
+    - ${QEMU_CMD} demo/build.riscv64/estimation_demo $DATA/models/yolov6.json $DATA/models/rtmpose.json $DATA/imgs/person.jpg result0.jpg
+    - $([[ "2becf268d0c8da27207ee1dec934e80b" == "$(md5sum result0.jpg | awk '{print $1}')" ]])
    - echo "Running riscv64 tests done!"
  <<: *only-defaults

--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,4 @@
+[submodule "pybind11"]
+	path = pybind11
+	url = https://github.com/pybind/pybind11.git
+	branch = v2.11
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,10 +1,18 @@
 cmake_minimum_required(VERSION 3.10)
- 
-project(bianbuai)
-add_subdirectory(${CMAKE_SOURCE_DIR}/src)

-option(DEMO "option for Demo" OFF)
-option(XDG "option for XDG autostart support" ON)
+file(READ ${CMAKE_SOURCE_DIR}/VERSION_NUMBER VERSION_CONTENT)
+string(STRIP "${VERSION_CONTENT}" VERSION_CONTENT)
+project(bianbuai-support-library VERSION ${VERSION_CONTENT})
+
+option(PYTHON "Option for Python" OFF)
+option(DEMO   "Option for Demo"   OFF)
+option(XDG    "Option for XDG autostart support"  OFF)
+
+if (PYTHON)
+  add_subdirectory(${CMAKE_SOURCE_DIR}/pybind11)
+endif()
+
+add_subdirectory(${CMAKE_SOURCE_DIR}/src)

 if (DEMO)
  set(BIANBUAI_HOME ${CMAKE_SOURCE_DIR})  # useless but necessary
@ -23,12 +31,3 @@ if (XDG)
  install(DIRECTORY ${CMAKE_SOURCE_DIR}/rootfs/usr/share/icons DESTINATION share)
  install(DIRECTORY ${CMAKE_SOURCE_DIR}/rootfs/etc DESTINATION ..)
 endif()
-
-# always install demo project with test data
-#install(DIRECTORY ${CMAKE_SOURCE_DIR}/demo DESTINATION .)
-#install(DIRECTORY ${CMAKE_SOURCE_DIR}/data DESTINATION demo)
-
-option(TEST "option for Test" ON)
-if (TEST)
-  add_subdirectory(${CMAKE_SOURCE_DIR}/tests)
-endif()
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,11 @@
+graft demo
+graft include
+graft python
+graft src
+graft tests
+recursive-include pybind11/include/pybind11 *.h
+recursive-include pybind11 *.py
+recursive-include pybind11 py.typed
+recursive-include pybind11 *.pyi
+include pybind11/share/cmake/pybind11/*.cmake
+include CMakeLists.txt
--- a/README.md
+++ b/README.md
@ -46,18 +46,18 @@ or
 ./detection_demo <model_file_path> <label_file_path> <image_file_path> <save_img_path> 
 or 
 ./detection_demo <config_file_path> <image_file_path> <save_img_path>
-./detection_stream_demo <model_file_path> <label_file_path> <input> <input_type> (video or camera_id) option(-h <resize_height>) option(-w <resize_width>)
+./detection_stream_demo [-h <resize_height>] [-w <resize_width>] [-f] <model_file_path> <label_file_path> <input>
 or
-./detection_stream_demo <config_file_path> <input> <input_type> (video or camera_id) option(-h <resize_height>) option(-w <resize_width>)
+./detection_stream_demo [-h <resize_height>] [-w <resize_width>] [-f] <config_file_path> <input>
 ./detection_video_demo <model_file_path> <label_file_path> <video_file_path> <dst_file_path> (end with .avi)
 or
 ./detection_video_demo <config_file_path> <video_file_path> <dst_file_path> (end with .avi)
 ./estimation_demo <det_model_file_path> <det_label_file_path> <pose_model_file_path> <image_file_path> <save_img_path> 
 or
 ./estimation_demo <det_config_file_path> <pose_config_file_path> <image_file_path> <save_img_path> 
-./tracker_stream_demo <det_model_file_path> <det_label_file_path> <pose_model_file_path> <input> <input_type> (video or cameraId) option(-h <resize_height>) option(-w <resize_width>)
+./tracker_stream_demo [-h <resize_height>] [-w <resize_width>] [-f] <det_model_file_path> <det_label_file_path> <pose_model_file_path> <input>
 or
-./tracker_stream_demo <det_config_file_path> <pose_config_file_path> <input> <input_type> (video or cameraId) option(-h <resize_height>) option(-w <resize_width>)
+./tracker_stream_demo [-h <resize_height>] [-w <resize_width>] [-f] <det_config_file_path> <pose_config_file_path> <input>
 ```

 ### Using environment variables to implement functions
@ -77,4 +77,8 @@ Model files format: [ONNX(Open Neural Network Exchange)](https://github.com/onnx

 label files format: using text document, [here](https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/OpenVINO_EP/Linux/squeezenet_classification/synset.txt) is a recommended example

-configuration files format: using [json](https://github.com/nlohmann/json), the recommended configuration file content is as [here](https://gitlab.dc.com:8443/bianbu/ai/support/-/blob/main/rootfs/usr/share/ai-support/models/yolov6.json)
+configuration files format: using [json](https://github.com/nlohmann/json), the recommended configuration file content is as [here](https://gitlab.dc.com:8443/bianbu/ai/support/-/blob/main/rootfs/usr/share/ai-support/models/yolov6.json)
+
+### Python Support
+
+See [README.md](./python/README.md) for more details.
--- a/2
+++ b/2
@ -1 +1 @@
-1.0.13
+1.0.15
--- a/ci/test.sh
+++ b/ci/test.sh
@ -77,8 +77,8 @@ task_detection=(
  "${BIN_DIR}/detection_demo data/models/nanodet-plus-m_320.onnx data/imgs/person0.jpg result0.jpg data/labels/coco.txt"
  "${BIN_DIR}/detection_demo data/models/nanodet-plus-m_320.int8.onnx data/imgs/person0.jpg result0.int8.jpg data/labels/coco.txt"
  "${BIN_DIR}/detection_video_demo data/models/nanodet-plus-m_320.int8.onnx data/labels/coco.txt data/videos/test.mp4 test.avi"
-  "${BIN_DIR}/detection_stream_demo data/models/nanodet-plus-m_320.int8.onnx data/labels/coco.txt data/videos/test.mp4 video"
-  "${BIN_DIR}/detection_stream_demo data/models/nanodet-plus-m_320.int8.onnx data/labels/coco.txt 0 cameraId"
+  "${BIN_DIR}/detection_stream_demo data/models/nanodet-plus-m_320.int8.onnx data/labels/coco.txt data/videos/test.mp4"
+  "${BIN_DIR}/detection_stream_demo data/models/nanodet-plus-m_320.int8.onnx data/labels/coco.txt 0"
 )
 function smoke_test() {
  # preparation(e.g. download models)
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,17 @@
+bianbu-ai-support (1.0.15) mantic-porting; urgency=medium
+
+  [ bianbu-ci ]
+  * Sync change from bianbu-23.10/1.0.15
+
+ -- qinhongjie <hongjie.qin@spacemit.com>  Tue, 13 May 2024 18:33:40 +0800
+
+bianbu-ai-support (1.0.14) mantic-porting; urgency=medium
+
+  [ bianbu-ci ]
+  * Sync change from bianbu-23.10/1.0.14
+
+ -- qinhongjie <hongjie.qin@spacemit.com>  Thu, 09 May 2024 16:33:40 +0800
+
 bianbu-ai-support (1.0.13) mantic-porting; urgency=medium

  [ bianbu-ci ]
@ -31,7 +45,7 @@ bianbu-ai-support (1.0.9) mantic-spacemit; urgency=medium
  [ bianbu-ci ]
  * Sync change from bianbu-23.10/1.0.9

- -- qinhongjie <hongjie.qin@spacemit.com>  Fri,  8 Mar 2024 19:59:48 +0800
+ -- qinhongjie <hongjie.qin@spacemit.com>  Fri, 08 Mar 2024 19:59:48 +0800

 bianbu-ai-support (1.0.8) mantic-spacemit; urgency=medium

@ -45,7 +59,7 @@ bianbu-ai-support (1.0.7) mantic-spacemit; urgency=medium
  [ bianbu-ci ]
  * Sync change from bianbu-23.10/1.0.7

- -- qinhongjie <hongjie.qin@spacemit.com>  Thu,  1 Feb 2024 18:14:18 +0800
+ -- qinhongjie <hongjie.qin@spacemit.com>  Thu, 01 Feb 2024 18:14:18 +0800

 bianbu-ai-support (1.0.6) mantic-spacemit; urgency=medium

--- a/debian/copyright
+++ b/debian/copyright
@ -5,66 +5,7 @@ Source: <url://example.com>
 #
 # Please double check copyright with the licensecheck(1) command.

-Files:     .gitlab-ci.yml
-           CMakeLists.txt
-           README.md
-           ci/dockerfile.riscv64
-           ci/dockerfile.x86_64
-           ci/test.sh
-           data/config/nanodet.json
-           data/config/yolov4.json
-           data/config/yolov6.json
-           data/imgs/bird.jpeg
-           data/imgs/cat.jpg
-           data/imgs/dog.jpg
-           data/imgs/person0.jpg
-           data/imgs/person1.jpg
-           data/imgs/person2.jpeg
-           data/labels/coco.txt
-           data/labels/synset.txt
-           demo/CMakeLists.txt
-           demo/README.md
-           demo/build.sh
-           demo/image_classification_demo.cc
-           demo/object_detection_demo.cc
-           demo/object_detection_stream_demo.cc
-           demo/object_detection_video_demo.cc
-           demo/utils/box_utils.h
-           demo/utils/check_utils.h
-           include/task/vision/image_classification_task.h
-           include/task/vision/image_classification_types.h
-           include/task/vision/object_detection_task.h
-           include/task/vision/object_detection_types.h
-           include/utils/time.h
-           include/utils/utils.h
-           src/CMakeLists.txt
-           src/core/engine.cc
-           src/core/engine.h
-           src/core/ort_wrapper.cc
-           src/core/ort_wrapper.h
-           src/processor/classification_postprocessor.cc
-           src/processor/classification_postprocessor.h
-           src/processor/classification_preprocessor.cc
-           src/processor/classification_preprocessor.h
-           src/processor/detection_postprocessor.cc
-           src/processor/detection_postprocessor.h
-           src/processor/detection_preprocessor.cc
-           src/processor/detection_preprocessor.h
-           src/processor/processor.h
-           src/task/core/base_task_api.h
-           src/task/vision/base_vision_task_api.h
-           src/task/vision/imageclassification/image_classification.cc
-           src/task/vision/imageclassification/image_classification.h
-           src/task/vision/imageclassification/image_classification_task.cc
-           src/task/vision/objectdetection/object_detection.cc
-           src/task/vision/objectdetection/object_detection.h
-           src/task/vision/objectdetection/object_detection_task.cc
-           src/tests/CMakeLists.txt
-           src/tests/json_test.cc
-           src/utils/cv2_utils.h
-           src/utils/json.hpp
-           src/utils/nms_utils.h
-           src/utils/utils.h
+Files:     TODO
 Copyright: __NO_COPYRIGHT_NOR_LICENSE__
 License:   __NO_COPYRIGHT_NOR_LICENSE__

--- a/demo/dataloader.hpp
+++ b/demo/dataloader.hpp
@ -13,12 +13,13 @@

 class DataLoader {
 public:
-  DataLoader(const int& resize_height, const int& resize_width) {
+  DataLoader(const int& resize_height, const int& resize_width, const int& flip = 0) {
    enable = true;
    resize_height_ = resize_height;
    resize_width_ = resize_width;
    preview_fps_ = 0;
    detection_fps_ = 0;
+    flip_ = flip;
  }
  ~DataLoader() {}
  bool ifEnable() { return enable; }
@ -32,6 +33,9 @@ class DataLoader {
  virtual cv::Mat fetchFrame() = 0;
  virtual cv::Mat peekFrame() = 0;

+ protected:
+  int flip_{0};
+
 private:
  bool enable;
  int resize_height_;
@ -43,8 +47,8 @@ class DataLoader {
 // 独占式
 class ExclusiveDataLoader : public DataLoader {
 public:
-  ExclusiveDataLoader(const int& resize_height, const int& resize_width)
-      : DataLoader(resize_height, resize_width) {}
+  ExclusiveDataLoader(const int& resize_height, const int& resize_width, const int& flip = 0)
+      : DataLoader(resize_height, resize_width, flip) {}
  ~ExclusiveDataLoader() {}
  int init(const std::string& path) {
    capture_.open(path);
@ -67,6 +71,9 @@ class ExclusiveDataLoader : public DataLoader {
  cv::Mat fetchFrame() {
    cv::Mat frame;
    capture_.read(frame);
+    if (flip_ && !frame.empty()) {
+      cv::flip(frame, frame, 1);
+    }
    return frame;
  }
  cv::Mat peekFrame() { return fetchFrame(); }
@ -110,8 +117,8 @@ inline bool isNumber(const std::string& str) {
 // 共享式
 class SharedDataLoader : public DataLoader {
 public:
-  SharedDataLoader(const int& resize_height, const int& resize_width)
-      : DataLoader(resize_height, resize_width) {}
+  SharedDataLoader(const int& resize_height, const int& resize_width, const int& flip = 0)
+      : DataLoader(resize_height, resize_width, flip) {}
  ~SharedDataLoader() {}

  int init(const std::string& path) {
@ -151,6 +158,9 @@ class SharedDataLoader : public DataLoader {
    capture_.read(frame);
    if (!frame.empty()) {
      frame_mutex_.lock();
+      if (flip_ && !frame.empty()) {
+        cv::flip(frame, frame, 1);
+      }
      resizeUnscale(frame, frame_, getResizeHeight(), getResizeWidth());
      frame_mutex_.unlock();
    }
--- a/demo/image_classification_demo.cc
+++ b/demo/image_classification_demo.cc
@ -1,34 +1,43 @@
 #include <iostream>

 #include "task/vision/image_classification_task.h"
+#include "utils/json_helper.hpp"
+#ifdef DEBUG
 #include "utils/time.h"
-#include "utils/utils.h"
+#endif
+
+static void usage(const char* exe) {
+  std::cout << "Usage: \n"
+            << exe << " <model_path> <label_path> <image_path>\n"
+            << exe << " <config_path> <image_path>\n";
+}

 int main(int argc, char* argv[]) {
+  if (argc != 3 && argc != 4) {
+    usage(argv[0]);
+    return -1;
+  }
+
  ImageClassificationOption option;
-  std::string config_file_path, image_file_path;
-  std::unique_ptr<ImageClassificationTask> imageclassificationtask;
+  std::string image_file_path;
  if (argc == 3) {
-    config_file_path = argv[1];
+    if (configToOption(argv[1], option) != 0) {
+      return -1;
+    }
    image_file_path = argv[2];
-    imageclassificationtask = std::unique_ptr<ImageClassificationTask>(
-        new ImageClassificationTask(config_file_path));
  } else if (argc == 4) {
    option.model_path = argv[1];
    option.label_path = argv[2];
    image_file_path = argv[3];
-    imageclassificationtask = std::unique_ptr<ImageClassificationTask>(
-        new ImageClassificationTask(option));
-  } else {
-    std::cout << "Please run with " << argv[0]
-              << " <model_file_path> <label_file_path> <image_file_path> or "
-              << argv[0] << " <config_file_path> <image_file_path>"
-              << std::endl;
-    return -1;
  }
+
+  std::unique_ptr<ImageClassificationTask> imageclassificationtask =
+      std::unique_ptr<ImageClassificationTask>(
+          new ImageClassificationTask(option));
  if (imageclassificationtask->getInitFlag() != 0) {
    return -1;
  }
+
  cv::Mat img_raw;
 #ifdef DEBUG
  std::cout << "." << std::endl;
--- a/demo/object_detection_demo.cc
+++ b/demo/object_detection_demo.cc
@ -3,43 +3,48 @@

 #include "object_detection.hpp"
 #include "task/vision/object_detection_task.h"
+#include "utils/json_helper.hpp"
+#ifdef DEBUG
 #include "utils/time.h"
-#include "utils/utils.h"
+#endif
+
+static void usage(const char* exe) {
+  std::cout << "Usage: \n"
+            << exe << " <model_path> <label_path> <image_path> <save_path>\n"
+            << exe << " <config_path> <image_path> <save_path>\n";
+}

 int main(int argc, char* argv[]) {
-  std::vector<Boxi> bboxes;
-  std::string image_file_path, save_img_path, config_file_path;
+  if (argc != 4 && argc != 5) {
+    usage(argv[0]);
+    return -1;
+  }
+
  ObjectDetectionOption option;
-  std::unique_ptr<ObjectDetectionTask> objectdetectiontask;
-  cv::Mat img_raw;
-#ifdef DEBUG
-  std::cout << "." << std::endl;
-#endif
+  std::string image_file_path, save_img_path;
  if (argc == 4) {
-    config_file_path = argv[1];
+    if (configToOption(argv[1], option) != 0) {
+      return -1;
+    }
    image_file_path = argv[2];
    save_img_path = argv[3];
-    objectdetectiontask = std::unique_ptr<ObjectDetectionTask>(
-        new ObjectDetectionTask(config_file_path));
  } else if (argc == 5) {
    option.model_path = argv[1];
    option.label_path = argv[2];
    image_file_path = argv[3];
    save_img_path = argv[4];
-    objectdetectiontask =
-        std::unique_ptr<ObjectDetectionTask>(new ObjectDetectionTask(option));
-  } else {
-    std::cout << "Please run with " << argv[0]
-              << " <model_file_path> <label_file_path> <image_file_path> "
-                 "<save_img_path> or "
-              << argv[0]
-              << " <config_file_path> <image_file_path> <save_img_path>"
-              << std::endl;
-    return -1;
  }
+
+  std::unique_ptr<ObjectDetectionTask> objectdetectiontask =
+      std::unique_ptr<ObjectDetectionTask>(new ObjectDetectionTask(option));
  if (objectdetectiontask->getInitFlag() != 0) {
    return -1;
  }
+
+  cv::Mat img_raw;
+#ifdef DEBUG
+  std::cout << "." << std::endl;
+#endif
  {
 #ifdef DEBUG
    TimeWatcher t("|-- Load input data");
@ -50,30 +55,23 @@ int main(int argc, char* argv[]) {
    std::cout << "[ ERROR ] Read image failed" << std::endl;
    return -1;
  }
-  bboxes = objectdetectiontask->Detect(img_raw).result_bboxes;
-  {
-#ifdef DEBUG
-    TimeWatcher t("|-- Output result");
-#endif
-    for (size_t i = 0; i < bboxes.size(); i++) {
-      std::cout << "bbox[" << std::setw(2) << i << "]"
-                << " "
-                << "x1y1x2y2: "
-                << "(" << std::setw(4) << bboxes[i].x1 << "," << std::setw(4)
-                << bboxes[i].y1 << "," << std::setw(4) << bboxes[i].x2 << ","
-                << std::setw(4) << bboxes[i].y2 << ")"
-                << ", "
-                << "score: " << std::fixed << std::setprecision(3)
-                << std::setw(4) << bboxes[i].score << ", "
-                << "label_text: " << bboxes[i].label_text << std::endl;
-    }
+  std::vector<Boxi> bboxes = objectdetectiontask->Detect(img_raw).result_bboxes;
+  for (size_t i = 0; i < bboxes.size(); i++) {
+    std::cout << "bbox[" << std::setw(2) << i << "] x1y1x2y2: (" << std::setw(4)
+              << bboxes[i].x1 << "," << std::setw(4) << bboxes[i].y1 << ","
+              << std::setw(4) << bboxes[i].x2 << "," << std::setw(4)
+              << bboxes[i].y2 << "), score: " << std::fixed
+              << std::setprecision(3) << std::setw(4) << bboxes[i].score
+              << ", label_text: " << bboxes[i].label_text << std::endl;
  }
+
  {
 #ifdef DEBUG
    TimeWatcher t("|-- Box drawing");
 #endif
    draw_boxes_inplace(img_raw, bboxes);
  }
+
  try {
    cv::imwrite(save_img_path, img_raw);
  } catch (cv::Exception& e) {
--- a/demo/object_detection_stream_demo.cc
+++ b/demo/object_detection_stream_demo.cc
@ -15,28 +15,19 @@
 #include "opencv2/opencv.hpp"
 #include "task/vision/object_detection_task.h"
 #include "utils/cv_helper.hpp"
+#include "utils/json_helper.hpp"
 #ifdef DEBUG
 #include "utils/time.h"
 #endif

-#include "utils/utils.h"
-
 class Detector {
 public:
-  explicit Detector(const std::string& config_file_path) {
-    config_file_path_ = config_file_path;
-  }
  explicit Detector(ObjectDetectionOption& option) { option_ = option; }
  ~Detector() {}
  // 初始化/反初始化
  int init() {
-    if (!config_file_path_.empty()) {
-      objectdetectiontask_ = std::unique_ptr<ObjectDetectionTask>(
-          new ObjectDetectionTask(config_file_path_));
-    } else {
-      objectdetectiontask_ = std::unique_ptr<ObjectDetectionTask>(
-          new ObjectDetectionTask(option_));
-    }
+    objectdetectiontask_ =
+        std::unique_ptr<ObjectDetectionTask>(new ObjectDetectionTask(option_));
    return getInitFlag();
  }

@ -72,7 +63,6 @@ class Detector {
  std::mutex objs_mutex_;
  std::queue<struct ObjectDetectionResult> objs_array_;
  std::unique_ptr<ObjectDetectionTask> objectdetectiontask_;
-  std::string config_file_path_;
  ObjectDetectionOption option_;
 };

@ -159,24 +149,16 @@ void Preview(DataLoader& dataloader, Detector& detector) {
              (objs.result_bboxes[i].y2 - dh) / resize_ratio;
        }
      }
-      {
-#ifdef DEBUG
-        TimeWatcher t("|-- Output result");
-#endif
-        for (size_t i = 0; i < objs.result_bboxes.size(); i++) {
-          std::cout << "bbox[" << std::setw(2) << i << "]"
-                    << " "
-                    << "x1y1x2y2: "
-                    << "(" << std::setw(4) << objs.result_bboxes[i].x1 << ","
-                    << std::setw(4) << objs.result_bboxes[i].y1 << ","
-                    << std::setw(4) << objs.result_bboxes[i].x2 << ","
-                    << std::setw(4) << objs.result_bboxes[i].y2 << ")"
-                    << ", "
-                    << "score: " << std::fixed << std::setprecision(3)
-                    << std::setw(4) << objs.result_bboxes[i].score << ", "
-                    << "label_text: " << objs.result_bboxes[i].label_text
-                    << std::endl;
-        }
+      for (size_t i = 0; i < objs.result_bboxes.size(); i++) {
+        std::cout << "bbox[" << std::setw(2) << i << "] x1y1x2y2: ("
+                  << std::setw(4) << objs.result_bboxes[i].x1 << ","
+                  << std::setw(4) << objs.result_bboxes[i].y1 << ","
+                  << std::setw(4) << objs.result_bboxes[i].x2 << ","
+                  << std::setw(4) << objs.result_bboxes[i].y2 << "), "
+                  << "score: " << std::fixed << std::setprecision(3)
+                  << std::setw(4) << objs.result_bboxes[i].score << ", "
+                  << "label_text: " << objs.result_bboxes[i].label_text
+                  << std::endl;
      }
    }
    // 调用 detector.detected 和 detector.get_object 期间,
@ -224,15 +206,23 @@ void Preview(DataLoader& dataloader, Detector& detector) {
  }
 }

+static void usage(const char* exe) {
+  std::cout << "Usage: \n"
+            << exe
+            << " [-h <resize_height>] [-w <resize_width>] [-f] <model_path> "
+               "<label_path> <input>\n"
+            << exe
+            << " [-h <resize_height>] [-w <resize_width>] [-f] <config_path> "
+               "<input>\n";
+}
+
 int main(int argc, char* argv[]) {
  cvConfig();

-  std::string config_file_path, input, input_type;
  ObjectDetectionOption option;
-  int resize_height{320}, resize_width{320};
-  std::unique_ptr<Detector> detector;
-  int o;
-  const char* optstring = "w:h:";
+  std::string input;
+  int o, resize_height{320}, resize_width{320}, flip{0};
+  const char* optstring = "w:h:f";
  while ((o = getopt(argc, argv, optstring)) != -1) {
    switch (o) {
      case 'w':
@ -241,35 +231,31 @@ int main(int argc, char* argv[]) {
      case 'h':
        resize_height = atoi(optarg);
        break;
+      case 'f':
+        flip = 1;
+        break;
      case '?':
        std::cout << "[ ERROR ] Unsupported usage" << std::endl;
        break;
    }
  }
-  if (argc - optind == 3) {
-    config_file_path = argv[optind];
+  if (argc - optind == 2) {
+    if (configToOption(argv[optind], option) != 0) {
+      return -1;
+    }
    input = argv[optind + 1];
-    input_type = argv[optind + 2];
-    detector = std::unique_ptr<Detector>(new Detector(config_file_path));
-  } else if (argc - optind == 4) {
+  } else if (argc - optind == 3) {
    option.model_path = argv[optind];
    option.label_path = argv[optind + 1];
    input = argv[optind + 2];
-    input_type = argv[optind + 3];
-    detector = std::unique_ptr<Detector>(new Detector(option));
  } else {
-    std::cout << "Please run with " << argv[0]
-              << " <model_file_path> <label_file_path> <input> <input_type> "
-                 "(video or camera_id) option(-h <resize_height>) option(-w "
-                 "<resize_width>) or "
-              << argv[0]
-              << " <config_file_path> <input> <input_type> (video "
-                 "or camera_id) option(-h <resize_height>) option(-w "
-                 "<resize_width>)"
-              << std::endl;
+    usage(argv[0]);
    return -1;
  }
-  SharedDataLoader dataloader{resize_height, resize_width};
+
+  std::unique_ptr<Detector> detector =
+      std::unique_ptr<Detector>(new Detector(option));
+  SharedDataLoader dataloader{resize_height, resize_width, flip};
  if (dataloader.init(input) != 0) {
    std::cout << "[ ERROR ] Dataloader init error" << std::endl;
    return -1;
--- a/demo/object_detection_video_demo.cc
+++ b/demo/object_detection_video_demo.cc
@ -2,45 +2,45 @@

 #include "object_detection.hpp"
 #include "task/vision/object_detection_task.h"
+#include "utils/json_helper.hpp"
 #ifdef DEBUG
 #include "utils/time.h"
 #endif

-#include "utils/utils.h"
+static void usage(const char *exe) {
+  std::cout << "Usage: \n"
+            << exe
+            << " <model_path> <label_path> <video_path> <save_path>(*.avi)\n"
+            << exe << " <config_path> <video_path> <save_path>(*.avi)\n";
+}

 int main(int argc, char *argv[]) {
-  std::unique_ptr<ObjectDetectionTask> objectdetectiontask;
-  std::string config_file_path, video_file_path, dst_file_path;
+  if (argc != 4 && argc != 5) {
+    usage(argv[0]);
+    return -1;
+  }
+
  ObjectDetectionOption option;
-#ifdef DEBUG
-  std::cout << "." << std::endl;
-#endif
+  std::string video_file_path, dst_file_path;
  if (argc == 4) {
-    config_file_path = argv[1];
+    if (configToOption(argv[1], option) != 0) {
+      return -1;
+    }
    video_file_path = argv[2];
    dst_file_path = argv[3];
-    objectdetectiontask = std::unique_ptr<ObjectDetectionTask>(
-        new ObjectDetectionTask(config_file_path));
  } else if (argc == 5) {
    option.model_path = argv[1];
    option.label_path = argv[2];
    video_file_path = argv[3];
    dst_file_path = argv[4];
-    objectdetectiontask =
-        std::unique_ptr<ObjectDetectionTask>(new ObjectDetectionTask(option));
-  } else {
-    std::cout << "Please run with " << argv[0]
-              << " <model_file_path> <label_file_path> <video_file_path> "
-                 "<dst_file_path> (end with .avi) or "
-              << argv[0]
-              << " <config_file_path> <video_file_path> "
-                 "<dst_file_path> (end with .avi)"
-              << std::endl;
-    return -1;
  }
+
+  std::unique_ptr<ObjectDetectionTask> objectdetectiontask =
+      std::unique_ptr<ObjectDetectionTask>(new ObjectDetectionTask(option));
  if (objectdetectiontask->getInitFlag() != 0) {
    return -1;
  }
+
  cv::VideoCapture capture(video_file_path);
  if (!capture.isOpened()) {
    std::cout << "[ ERROR ] Open video capture failed" << std::endl;
@ -52,42 +52,43 @@ int main(int argc, char *argv[]) {
    return -1;
  }
  double rate = capture.get(cv::CAP_PROP_FPS);
-  int delay = 1000 / rate;
+  // int delay = 1000 / rate;
  int fps = rate;
  int frameWidth = frame.rows;
  int frameHeight = frame.cols;
  cv::VideoWriter writer(dst_file_path,
                         cv::VideoWriter::fourcc('D', 'I', 'V', 'X'), fps,
                         cv::Size(frameHeight, frameWidth), 1);
+
+#ifdef DEBUG
+  std::cout << "." << std::endl;
+#endif
  while (true) {
    capture >> frame;
    if (frame.empty()) {
      break;
    }
+
    std::vector<Boxi> bboxes = objectdetectiontask->Detect(frame).result_bboxes;
-    {
-#ifdef DEBUG
-      TimeWatcher t("|-- Output result");
-#endif
-      for (size_t i = 0; i < bboxes.size(); i++) {
-        std::cout << "bbox[" << std::setw(2) << i << "]"
-                  << " "
-                  << "x1y1x2y2: "
-                  << "(" << std::setw(4) << bboxes[i].x1 << "," << std::setw(4)
-                  << bboxes[i].y1 << "," << std::setw(4) << bboxes[i].x2 << ","
-                  << std::setw(4) << bboxes[i].y2 << ")"
-                  << ", "
-                  << "score: " << std::fixed << std::setprecision(3)
-                  << std::setw(4) << bboxes[i].score << ", "
-                  << "label_text: " << bboxes[i].label_text << std::endl;
-      }
+    for (size_t i = 0; i < bboxes.size(); i++) {
+      std::cout << "bbox[" << std::setw(2) << i << "]"
+                << " "
+                << "x1y1x2y2: "
+                << "(" << std::setw(4) << bboxes[i].x1 << "," << std::setw(4)
+                << bboxes[i].y1 << "," << std::setw(4) << bboxes[i].x2 << ","
+                << std::setw(4) << bboxes[i].y2 << ")"
+                << ", "
+                << "score: " << std::fixed << std::setprecision(3)
+                << std::setw(4) << bboxes[i].score << ", "
+                << "label_text: " << bboxes[i].label_text << std::endl;
    }
+
    draw_boxes_inplace(frame, bboxes);
    writer.write(frame);
-    cv::waitKey(
-        delay);  // 因为图像处理需要消耗一定时间,所以图片展示速度比保存视频要慢
-                 // cv::imshow("Detection", frame);
+    // cv::waitKey(delay);
+    // cv::imshow("Detection", frame);
  }
+
  capture.release();
  writer.release();
  return 0;
--- a/demo/pose_estimation.hpp
+++ b/demo/pose_estimation.hpp
@ -8,16 +8,12 @@
 #include "opencv2/opencv.hpp"
 #include "task/vision/pose_estimation_types.h"

-inline void draw_points_inplace(cv::Mat &img,
-                                const std::vector<PosePoint> &points) {
-  std::vector<std::pair<int, int>> coco_17_joint_links = {
-      {0, 1},   {0, 2},   {1, 3},   {2, 4},  {5, 7},  {7, 9},
-      {6, 8},   {8, 10},  {5, 6},   {5, 11}, {6, 12}, {11, 12},
-      {11, 13}, {13, 15}, {12, 14}, {14, 16}};
-  for (size_t i = 0; i < points.size(); ++i) {
-    cv::circle(img, cv::Point(points[i].x, points[i].y), 2,
-               cv::Scalar{0, 0, 255}, 2, cv::LINE_AA);
-  }
+static std::vector<std::pair<int, int>> coco_17_joint_links = {
+    {0, 1}, {0, 2},  {1, 3},  {2, 4},   {5, 7},   {7, 9},   {6, 8},   {8, 10},
+    {5, 6}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {13, 15}, {12, 14}, {14, 16}};
+
+inline void draw_lines_inplace(cv::Mat &img,
+                               const std::vector<PosePoint> &points) {
  for (size_t i = 0; i < coco_17_joint_links.size(); ++i) {
    std::pair<int, int> joint_links = coco_17_joint_links[i];
    cv::line(
@ -28,4 +24,13 @@ inline void draw_points_inplace(cv::Mat &img,
  }
 }

+inline void draw_points_inplace(cv::Mat &img,
+                                const std::vector<PosePoint> &points) {
+  for (size_t i = 0; i < points.size(); ++i) {
+    cv::circle(img, cv::Point(points[i].x, points[i].y), 2,
+               cv::Scalar{0, 0, 255}, 2, cv::LINE_AA);
+  }
+  draw_lines_inplace(img, points);
+}
+
 #endif  // SUPPORT_DEMO_POSE_ESTIMATION_HPP_
--- a/demo/pose_estimation_demo.cc
+++ b/demo/pose_estimation_demo.cc
@ -1,57 +1,60 @@
 #include <iomanip>  // for: setprecision
 #include <iostream>

+#include "pose_estimation.hpp"
 #include "task/vision/object_detection_task.h"
 #include "task/vision/pose_estimation_task.h"
-#include "utils/time.h"
-#include "utils/utils.h"
+#include "utils/json_helper.hpp"
+
+static void usage(const char* exe) {
+  std::cout << "Usage: \n"
+            << exe
+            << " <detection_model_path> <detection_label_path> "
+               "<pose_point_model_path> <image_path> <save_path>\n"
+            << exe
+            << " <detection_config_path> <pose_point_config_path> <image_path> "
+               "<save_path>\n";
+}

 int main(int argc, char* argv[]) {
-  std::vector<std::pair<int, int>> coco_17_joint_links = {
-      {0, 1},   {0, 2},   {1, 3},   {2, 4},  {5, 7},  {7, 9},
-      {6, 8},   {8, 10},  {5, 6},   {5, 11}, {6, 12}, {11, 12},
-      {11, 13}, {13, 15}, {12, 14}, {14, 16}};
-  std::vector<PosePoint> points;
-  std::vector<Boxi> bboxes;
-  std::string det_file_path, pose_file_path, image_file_path, save_img_path;
-  PoseEstimationOption estimation_option;
+  if (argc != 5 && argc != 6) {
+    usage(argv[0]);
+    return -1;
+  }
+
  ObjectDetectionOption detection_option;
-  std::unique_ptr<ObjectDetectionTask> objectdetectiontask;
-  std::unique_ptr<PoseEstimationTask> poseestimationtask;
-  cv::Mat img_raw, img;
-#ifdef DEBUG
-  std::cout << "." << std::endl;
-#endif
+  PoseEstimationOption estimation_option;
+  std::string image_file_path, save_img_path;
  if (argc == 5) {
-    det_file_path = argv[1];
-    pose_file_path = argv[2];
+    if (configToOption(argv[1], detection_option) != 0 ||
+        configToOption(argv[2], estimation_option) != 0) {
+      return -1;
+    }
    image_file_path = argv[3];
    save_img_path = argv[4];
-    objectdetectiontask = std::unique_ptr<ObjectDetectionTask>(
-        new ObjectDetectionTask(det_file_path));
-    poseestimationtask = std::unique_ptr<PoseEstimationTask>(
-        new PoseEstimationTask(pose_file_path));
  } else if (argc == 6) {
    detection_option.model_path = argv[1];
    detection_option.label_path = argv[2];
    estimation_option.model_path = argv[3];
    image_file_path = argv[4];
    save_img_path = argv[5];
-    objectdetectiontask = std::unique_ptr<ObjectDetectionTask>(
-        new ObjectDetectionTask(detection_option));
-    poseestimationtask = std::unique_ptr<PoseEstimationTask>(
-        new PoseEstimationTask(estimation_option));
-  } else {
-    std::cout << "Please run with " << argv[0]
-              << " <det_model_file_path> <det_label_file_path> "
-                 "<pose_model_file_path> <image_file_path> "
-                 "<save_img_path> or "
-              << argv[0]
-              << " <det_config_file_path> <pose_config_file_path> "
-                 "<image_file_path> <save_img_path>"
-              << std::endl;
+  }
+
+  std::unique_ptr<ObjectDetectionTask> objectdetectiontask =
+      std::unique_ptr<ObjectDetectionTask>(
+          new ObjectDetectionTask(detection_option));
+  std::unique_ptr<PoseEstimationTask> poseestimationtask =
+      std::unique_ptr<PoseEstimationTask>(
+          new PoseEstimationTask(estimation_option));
+  if (objectdetectiontask->getInitFlag() != 0 ||
+      poseestimationtask->getInitFlag() != 0) {
    return -1;
  }
+
+  cv::Mat img_raw;
+#ifdef DEBUG
+  std::cout << "." << std::endl;
+#endif
  {
 #ifdef DEBUG
    TimeWatcher t("|-- Load input data");
@ -61,51 +64,19 @@ int main(int argc, char* argv[]) {
      std::cout << "[ ERROR ] Read image failed" << std::endl;
      return -1;
    }
-    resizeUnscale(img_raw, img, 320, 320);
  }
-  if (objectdetectiontask->getInitFlag() != 0) {
-    return -1;
-  }
-  bboxes = objectdetectiontask->Detect(img).result_bboxes;
-  if (poseestimationtask->getInitFlag() != 0) {
-    return -1;
-  }
-  Boxi box;
+
+  std::vector<Boxi> bboxes = objectdetectiontask->Detect(img_raw).result_bboxes;
  for (size_t i = 0; i < bboxes.size(); i++) {
-    box = bboxes[i];
+    Boxi box = bboxes[i];
    if (box.label != 0) {
      continue;
    }
-    points = poseestimationtask->Estimate(img, box).result_points;
-    if (points.size()) {
-      int input_height = 320;
-      int input_width = 320;
-      int img_height = img_raw.rows;
-      int img_width = img_raw.cols;
-      float resize_ratio = std::min(
-          static_cast<float>(input_height) / static_cast<float>(img_height),
-          static_cast<float>(input_width) / static_cast<float>(img_width));
-      float dw = (input_width - resize_ratio * img_width) / 2;
-      float dh = (input_height - resize_ratio * img_height) / 2;
-      for (size_t i = 0; i < points.size(); i++) {
-        points[i].x = (points[i].x - dw) / resize_ratio;
-        points[i].y = (points[i].y - dh) / resize_ratio;
-      }
-    }
-    for (size_t i = 0; i < points.size(); ++i) {
-      cv::circle(img_raw, cv::Point(points[i].x, points[i].y), 2,
-                 cv::Scalar{0, 0, 255}, 2, cv::LINE_AA);
-    }
-
-    for (size_t i = 0; i < coco_17_joint_links.size(); ++i) {
-      std::pair<int, int> joint_links = coco_17_joint_links[i];
-      cv::line(
-          img_raw,
-          cv::Point(points[joint_links.first].x, points[joint_links.first].y),
-          cv::Point(points[joint_links.second].x, points[joint_links.second].y),
-          cv::Scalar{0, 255, 0}, 2, cv::LINE_AA);
-    }
+    std::vector<PosePoint> points =
+        poseestimationtask->Estimate(img_raw, box).result_points;
+    draw_points_inplace(img_raw, points);
  }
+
  try {
    cv::imwrite(save_img_path, img_raw);
  } catch (cv::Exception& e) {
--- a/demo/pose_tracker_stream_demo.cc
+++ b/demo/pose_tracker_stream_demo.cc
@ -16,18 +16,13 @@
 #include "task/vision/object_detection_task.h"
 #include "task/vision/pose_estimation_task.h"
 #include "utils/cv_helper.hpp"
+#include "utils/json_helper.hpp"
 #ifdef DEBUG
 #include "utils/time.h"
 #endif

-#include "utils/utils.h"
-
 class Tracker {
 public:
-  Tracker(const std::string& det_file_path, const std::string& pose_file_path) {
-    det_file_path_ = det_file_path;
-    pose_file_path_ = pose_file_path;
-  }
  Tracker(const ObjectDetectionOption& detection_option,
          const PoseEstimationOption& estimation_option) {
    detection_option_ = detection_option;
@ -36,17 +31,11 @@ class Tracker {
  ~Tracker() {}
  // 初始化/反初始化
  int init() {
-    if (!det_file_path_.empty()) {
-      objectdetectiontask_ = std::unique_ptr<ObjectDetectionTask>(
-          new ObjectDetectionTask(det_file_path_));
-      poseestimationtask_ = std::unique_ptr<PoseEstimationTask>(
-          new PoseEstimationTask(pose_file_path_));
-    } else {
-      objectdetectiontask_ = std::unique_ptr<ObjectDetectionTask>(
-          new ObjectDetectionTask(detection_option_));
-      poseestimationtask_ = std::unique_ptr<PoseEstimationTask>(
-          new PoseEstimationTask(estimation_option_));
-    }
+    objectdetectiontask_ = std::unique_ptr<ObjectDetectionTask>(
+        new ObjectDetectionTask(detection_option_));
+    poseestimationtask_ = std::unique_ptr<PoseEstimationTask>(
+        new PoseEstimationTask(estimation_option_));
+
    return getInitFlag();
  }

@ -102,8 +91,6 @@ class Tracker {
  std::queue<struct PoseEstimationResult> poses_array_;
  std::unique_ptr<ObjectDetectionTask> objectdetectiontask_;
  std::unique_ptr<PoseEstimationTask> poseestimationtask_;
-  std::string pose_file_path_;
-  std::string det_file_path_;
  ObjectDetectionOption detection_option_;
  PoseEstimationOption estimation_option_;
 };
@ -236,16 +223,25 @@ void Preview(DataLoader& dataloader, Tracker& tracker) {
  }
 }

+static void usage(const char* exe) {
+  std::cout << "Usage: \n"
+            << exe
+            << " [-h <resize_height>] [-w <resize_width>] [-f] "
+               "<detection_model_path> <detection_label_path> "
+               "<pose_point_model_path> <input>\n"
+            << exe
+            << " [-h <resize_height>] [-w <resize_width>] [-f] "
+               "<detection_config_path> <pose_point_config_path> <input>\n";
+}
+
 int main(int argc, char* argv[]) {
  cvConfig();

-  std::string det_file_path, pose_file_path, input, input_type;
-  int resize_height{320}, resize_width{320};
  ObjectDetectionOption detection_option;
  PoseEstimationOption estimation_option;
-  std::unique_ptr<Tracker> tracker;
-  int o;
-  const char* optstring = "w:h:";
+  std::string input;
+  int o, resize_height{320}, resize_width{320}, flip{0};
+  const char* optstring = "w:h:f";
  while ((o = getopt(argc, argv, optstring)) != -1) {
    switch (o) {
      case 'w':
@ -254,40 +250,33 @@ int main(int argc, char* argv[]) {
      case 'h':
        resize_height = atoi(optarg);
        break;
+      case 'f':
+        flip = 1;
+        break;
      case '?':
        std::cout << "[ ERROR ] Unsupported usage" << std::endl;
        break;
    }
  }
-  if (argc - optind == 4) {
-    det_file_path = argv[optind];
-    pose_file_path = argv[optind + 1];
+  if (argc - optind == 3) {
+    if (configToOption(argv[optind], detection_option) != 0 ||
+        configToOption(argv[optind + 1], estimation_option) != 0) {
+      return -1;
+    }
    input = argv[optind + 2];
-    input_type = argv[optind + 3];
-    tracker =
-        std::unique_ptr<Tracker>(new Tracker(det_file_path, pose_file_path));
-  } else if (argc - optind == 5) {
+  } else if (argc - optind == 4) {
    detection_option.model_path = argv[optind];
    detection_option.label_path = argv[optind + 1];
    estimation_option.model_path = argv[optind + 2];
    input = argv[optind + 3];
-    input_type = argv[optind + 4];
-    tracker = std::unique_ptr<Tracker>(
-        new Tracker(detection_option, estimation_option));
  } else {
-    std::cout
-        << "Please run with " << argv[0]
-        << " <det_model_file_path> <det_label_file_path> "
-           "<pose_model_file_path> <input> <input_type> (video or cameraId "
-           "option(-h <resize_height>) option(-w <resize_width>) or "
-        << argv[0]
-        << " <det_config_file_path> <pose_config_file_path> <input> "
-           "<input_type> (video or cameraId option(-h <resize_height>) "
-           "option(-w <resize_width>)"
-        << std::endl;
+    usage(argv[0]);
    return -1;
  }
-  SharedDataLoader dataloader{resize_height, resize_width};
+
+  std::unique_ptr<Tracker> tracker = std::unique_ptr<Tracker>(
+      new Tracker(detection_option, estimation_option));
+  SharedDataLoader dataloader{resize_height, resize_width, flip};
  if (dataloader.init(input) != 0) {
    std::cout << "[ ERROR ] dataloader init error" << std::endl;
    return -1;
--- a/demo/utils/cv_helper.hpp
+++ b/demo/utils/cv_helper.hpp
@ -28,9 +28,9 @@ void cvConfig() {
  }
  // smoke test to init opencv internal threads
  cv::parallel_for_(cv::Range(0, 16), [&](const cv::Range range) {
-#if 0
    std::ostringstream out;
    out << "Thread " << cv::getThreadNum() << "(opencv=" << cv::utils::getThreadID() << "): range " << range.start << "-" << range.end << std::endl;
+#if 0
    std::cout << out.str() << std::flush;
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
 #endif
--- a/demo/utils/json.hpp
+++ b/demo/utils/json.hpp
--- a/demo/utils/json_helper.hpp
+++ b/demo/utils/json_helper.hpp
@ -0,0 +1,95 @@
+#ifndef SUPPORT_DEMO_UTILS_JSON_HELPER_HPP_
+#define SUPPORT_DEMO_UTILS_JSON_HELPER_HPP_
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "json.hpp"
+#include "task/vision/image_classification_types.h"
+#include "task/vision/object_detection_types.h"
+#include "task/vision/pose_estimation_types.h"
+
+using json = nlohmann::json;
+
+static int getConfig(const char* config_file_path, json& config) {
+  std::ifstream f(config_file_path);
+  try {
+    config = json::parse(f);
+  } catch (json::parse_error& ex) {
+    std::cout << "[ ERROR ]  Init fail, parse json config file fail"
+              << std::endl;
+    return 0;
+  }
+  return 1;
+}
+
+int configToOption(const char* config_file_path,
+                   ImageClassificationOption& option) {
+  json config;
+  if (!getConfig(config_file_path, config)) {
+    return -1;
+  }
+  std::string model_path = config["model_path"];
+  option.model_path = model_path;
+  std::string label_path = config["label_path"];
+  option.label_path = label_path;
+  if (config.contains("intra_threads_num")) {
+    option.intra_threads_num = config["intra_threads_num"];
+  }
+  if (config.contains("inter_threads_num")) {
+    option.inter_threads_num = config["inter_threads_num"];
+  }
+  return 0;
+}
+
+int configToOption(const char* config_file_path,
+                   ObjectDetectionOption& option) {
+  json config;
+  if (!getConfig(config_file_path, config)) {
+    return -1;
+  }
+  std::string model_path = config["model_path"];
+  option.model_path = model_path;
+  std::string label_path = config["label_path"];
+  option.label_path = label_path;
+  if (config.contains("intra_threads_num")) {
+    option.intra_threads_num = config["intra_threads_num"];
+  }
+  if (config.contains("inter_threads_num")) {
+    option.inter_threads_num = config["inter_threads_num"];
+  }
+  if (config.contains("score_threshold")) {
+    option.score_threshold = config["score_threshold"];
+  }
+  if (config.contains("nms_threshold")) {
+    option.nms_threshold = config["nms_threshold"];
+  }
+  if (config.contains("class_name_whitelist")) {
+    option.class_name_whitelist =
+        config["class_name_whitelist"].get<std::vector<int>>();
+  }
+  if (config.contains("class_name_blacklist")) {
+    option.class_name_blacklist =
+        config["class_name_blacklist"].get<std::vector<int>>();
+  }
+  return 0;
+}
+
+int configToOption(const char* config_file_path, PoseEstimationOption& option) {
+  json config;
+  if (!getConfig(config_file_path, config)) {
+    return -1;
+  }
+  std::string model_path = config["model_path"];
+  option.model_path = model_path;
+  if (config.contains("intra_threads_num")) {
+    option.intra_threads_num = config["intra_threads_num"];
+  }
+  if (config.contains("inter_threads_num")) {
+    option.inter_threads_num = config["inter_threads_num"];
+  }
+  return 0;
+}
+
+#endif  // SUPPORT_DEMO_UTILS_JSON_HELPER_HPP_
--- a/include/task/vision/image_classification_task.h
+++ b/include/task/vision/image_classification_task.h
@ -2,14 +2,12 @@
 #define SUPPORT_INCLUDE_TASK_VISION_IMAGE_CLASSIFICATION_TASK_H_

 #include <memory>  // for: shared_ptr
-#include <string>

 #include "opencv2/opencv.hpp"
 #include "task/vision/image_classification_types.h"

 class ImageClassificationTask {
 public:
-  explicit ImageClassificationTask(const std::string& config_file_path);
  explicit ImageClassificationTask(const ImageClassificationOption& option);
  ~ImageClassificationTask() = default;
  ImageClassificationResult Classify(const cv::Mat& img_raw);
--- a/include/task/vision/image_classification_types.h
+++ b/include/task/vision/image_classification_types.h
@ -15,6 +15,17 @@ struct ImageClassificationOption {
  std::string label_path;
  int intra_threads_num = 2;
  int inter_threads_num = 2;
+  ImageClassificationOption()
+      : model_path(""),
+        label_path(""),
+        intra_threads_num(2),
+        inter_threads_num(2) {}
+  ImageClassificationOption(const std::string mp, const std::string lp,
+                            const int atm, const int etm)
+      : model_path(mp),
+        label_path(lp),
+        intra_threads_num(atm),
+        inter_threads_num(etm) {}
 };

 #endif  // SUPPORT_INCLUDE_TASK_VISION_IMAGE_CLASSIFICATION_TYPES_H_
--- a/include/task/vision/object_detection_task.h
+++ b/include/task/vision/object_detection_task.h
@ -2,15 +2,13 @@
 #define SUPPORT_INCLUDE_TASK_VISION_OBJECT_DETECTION_TASK_H_

 #include <memory>  // for: shared_ptr
-#include <string>
-#include <vector>  //for: vector
+#include <vector>  // for: vector

 #include "opencv2/opencv.hpp"
 #include "task/vision/object_detection_types.h"

 class ObjectDetectionTask {
 public:
-  explicit ObjectDetectionTask(const std::string &config_file_path);
  explicit ObjectDetectionTask(const ObjectDetectionOption &option);
  ~ObjectDetectionTask() = default;
  ObjectDetectionResult Detect(const cv::Mat &img_raw);
--- a/include/task/vision/object_detection_types.h
+++ b/include/task/vision/object_detection_types.h
@ -169,6 +169,23 @@ struct ObjectDetectionOption {
  float nms_threshold = -1.f;
  std::vector<int> class_name_whitelist;
  std::vector<int> class_name_blacklist;
+  ObjectDetectionOption()
+      : model_path(""),
+        label_path(""),
+        intra_threads_num(2),
+        inter_threads_num(2) {}
+  ObjectDetectionOption(const std::string mp, const std::string lp,
+                        const int atm, const int etm, const float st,
+                        const float nt, const std::vector<int>& cnw,
+                        const std::vector<int>& cnb)
+      : model_path(mp),
+        label_path(lp),
+        intra_threads_num(atm),
+        inter_threads_num(etm),
+        score_threshold(st),
+        nms_threshold(nt),
+        class_name_whitelist(cnw),
+        class_name_blacklist(cnb) {}
 };

 #endif  // SUPPORT_INCLUDE_TASK_VISION_OBJECT_DETECTION_TYPES_H_
--- a/include/task/vision/pose_estimation_task.h
+++ b/include/task/vision/pose_estimation_task.h
@ -2,15 +2,13 @@
 #define SUPPORT_INCLUDE_TASK_VISION_POSE_ESTIMATION_TASK_H_

 #include <memory>  // for: shared_ptr
-#include <string>

 #include "opencv2/opencv.hpp"
-#include "task/vision/object_detection_types.h"
+#include "task/vision/object_detection_types.h" // for: Boxi
 #include "task/vision/pose_estimation_types.h"

 class PoseEstimationTask {
 public:
-  explicit PoseEstimationTask(const std::string &config_file_path);
  explicit PoseEstimationTask(const PoseEstimationOption &option);
  ~PoseEstimationTask() = default;
  int getInitFlag();
--- a/include/task/vision/pose_estimation_types.h
+++ b/include/task/vision/pose_estimation_types.h
@ -10,11 +10,8 @@ struct PosePoint {
  int y;
  float score;

-  PosePoint() {
-    x = 0;
-    y = 0;
-    score = 0.0;
-  }
+  PosePoint() : x(0), y(0), score(0.f) {}
+  PosePoint(int x, int y, float score) : x(x), y(y), score(score) {}
 };

 typedef PosePoint Vector2D;
@ -28,6 +25,10 @@ struct PoseEstimationOption {
  std::string model_path;
  int intra_threads_num = 2;
  int inter_threads_num = 2;
+  PoseEstimationOption()
+      : model_path(""), intra_threads_num(2), inter_threads_num(2) {}
+  PoseEstimationOption(const std::string mp, const int atm, const int etm)
+      : model_path(mp), intra_threads_num(atm), inter_threads_num(etm) {}
 };

 #endif  // SUPPORT_INCLUDE_TASK_VISION_POSE_ESTIMATION_TYPES_H_
--- a/include/utils/utils.h
+++ b/include/utils/utils.h
@ -1,24 +1,9 @@
 #ifndef SUPPORT_INCLUDE_UTILS_UTILS_H_
 #define SUPPORT_INCLUDE_UTILS_UTILS_H_

-#include <algorithm>
-#include <string>
-
 #include "opencv2/opencv.hpp"
-#include "task/vision/image_classification_types.h"
-#include "task/vision/object_detection_types.h"
-#include "task/vision/pose_estimation_types.h"

 void resizeUnscale(const cv::Mat &mat, cv::Mat &mat_rs, int target_height,
                   int target_width);

-int configToOption(const std::string &config_file_path,
-                   PoseEstimationOption &option);
-
-int configToOption(const std::string &config_file_path,
-                   ObjectDetectionOption &option);
-
-int configToOption(const std::string &config_file_path,
-                   ImageClassificationOption &option);
-
 #endif  // SUPPORT_INCLUDE_UTILS_UTILS_H_
--- a/python/README.md
+++ b/python/README.md
@ -0,0 +1,48 @@
+## prepare env
+
+```shell
+# for ubuntu
+sudo apt install python3-dev python3-pip cmake gcc g++ onnxruntime
+# for centos
+sudo yum install python3-devel python3-pip cmake gcc gcc-c++ onnxruntime
+
+python3 -m pip install wheel setuptools
+```
+
+## quick build
+
+* cmake project
+
+```shell
+git submodule update --init --recursive
+
+mkdir build && cd build
+# Note: static opencv libraries is required
+cmake .. -DORT_HOME=${PATH_TO_ONNXRUNTIME} -DOpenCV_DIR=${PATH_TO_OPENCV_CMAKE_DIR} -DPYTHON=ON
+make -j`nproc` bianbuai_pybind11_state VERBOSE=1
+# Or
+cmake --build . --config Release --verbose -j`nproc` --target bianbuai_pybind11_state
+cmake --install . --config Release --verbose --component pybind11 # --strip
+```
+
+* python package
+
+```shell
+export ORT_HOME=${PATH_TO_ONNXRUNTIME}
+export OPENCV_DIR=${PATH_TO_OPENCV_CMAKE_DIR}
+python python/setup.py sdist bdist_wheel
+```
+
+## smoke unittest
+
+```shell
+# prepare env, e.g. with ubuntu22.04
+python3 -m pip install opencv-python
+# or just
+sudo apt install python3-opencv
+
+# run unittest under build diretctory
+ln -sf ../rootfs/usr/share/ai-support data
+cp ../tests/python/test_python_task.py .
+python3 test_python_task.py
+```
--- a/python/bianbuai/init.py
+++ b/python/bianbuai/init.py
@ -0,0 +1 @@
+from bianbuai.capi._pybind_state import * # noqa
--- a/python/bianbuai/capi/init.py
+++ b/python/bianbuai/capi/init.py
@ -0,0 +1,3 @@
+# -------------------------------------------------------------------------
+# Copyright (c) SpacemiT Corporation. All rights reserved.
+# --------------------------------------------------------------------------
--- a/python/bianbuai/capi/_ld_preload.py
+++ b/python/bianbuai/capi/_ld_preload.py
@ -0,0 +1,6 @@
+# -------------------------------------------------------------------------
+# Copyright (c) SpacemiT Corporation. All rights reserved.
+# --------------------------------------------------------------------------
+
+# This file can be modified by setup.py when building a manylinux2010 wheel
+# When modified, it will preload some libraries needed for the python C extension
--- a/python/bianbuai/capi/_pybind_state.py
+++ b/python/bianbuai/capi/_pybind_state.py
@ -0,0 +1,10 @@
+# -------------------------------------------------------------------------
+# Copyright (c) SpacemiT Corporation. All rights reserved.
+# --------------------------------------------------------------------------
+"""
+Ensure that dependencies are available and then load the extension module.
+"""
+
+from . import _ld_preload  # noqa: F401
+
+from .bianbuai_pybind11_state import *  # noqa
--- a/python/bianbuai_pybind11_state.cmake
+++ b/python/bianbuai_pybind11_state.cmake
@ -0,0 +1,19 @@
+if (NOT DEFINED OpenCV_SHARED OR OpenCV_SHARED STREQUAL "ON")
+  message(WARNING "Python binding suggests to link static OpenCV libraries")
+endif()
+
+file(GLOB_RECURSE BIANBU_PYBIND_SRCS "${CMAKE_SOURCE_DIR}/python/*.cc")
+list(APPEND BIANBU_PYBIND_SRCS ${BIANBU_SRC_FILES})
+
+pybind11_add_module(bianbuai_pybind11_state ${BIANBU_PYBIND_SRCS})
+target_include_directories(bianbuai_pybind11_state PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR})
+target_include_directories(bianbuai_pybind11_state SYSTEM PRIVATE ${OPENCV_INC})
+target_include_directories(bianbuai_pybind11_state SYSTEM PRIVATE ${ORT_HOME}/include ${ORT_HOME}/include/onnxruntime)
+target_link_libraries(bianbuai_pybind11_state PRIVATE ${HIDE_SYMBOLS_LINKER_FLAGS})
+target_link_libraries(bianbuai_pybind11_state PRIVATE ${TARGET_SHARED_LINKER_FLAGS})
+target_link_libraries(bianbuai_pybind11_state PRIVATE ${SPACEMITEP_LIB} onnxruntime ${OPENCV_LIBS})
+
+install(TARGETS bianbuai_pybind11_state
+        RUNTIME COMPONENT pybind11 DESTINATION bin
+        LIBRARY COMPONENT pybind11 DESTINATION lib
+        ARCHIVE COMPONENT pybind11 DESTINATION lib)
--- a/python/setup.py
+++ b/python/setup.py
@ -0,0 +1,208 @@
+import os, sys
+import subprocess
+from collections import namedtuple
+from pathlib import Path
+import shutil
+
+from setuptools import Extension, setup, find_packages
+from setuptools.command.build_ext import build_ext
+#from setuptools.command.install import install as InstallCommandBase
+#from setuptools.command.install_lib import install_lib as InstallLibCommandBase
+
+package_name = "bianbuai"
+
+SCRIPT_DIR = os.path.dirname(__file__)
+TOP_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, ".."))
+
+# Convert distutils Windows platform specifiers to CMake -A arguments
+PLAT_TO_CMAKE = {
+    "win32": "Win32",
+    "win-amd64": "x64",
+    "win-arm32": "ARM",
+    "win-arm64": "ARM64",
+}
+
+
+class CMakeExtension(Extension):
+    def __init__(self, name: str, sourcedir: str = "") -> None:
+        super().__init__(name, sources=[])
+        self.sourcedir = TOP_DIR # os.fspath(Path(sourcedir).resolve())
+
+
+class CMakeBuild(build_ext):
+    def build_extension(self, ext: CMakeExtension) -> None:
+        # Must be in this form due to bug in .resolve() only fixed in Python 3.10+
+        ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
+        extdir = ext_fullpath.parent.resolve() / package_name / "capi"
+
+        # Using this requires trailing slash for auto-detection & inclusion of
+        # auxiliary "native" libs
+
+        debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
+        cfg = "Debug" if debug else "Release"
+
+        # CMake lets you override the generator - we need to check this.
+        # Can be set with Conda-Build, for example.
+        cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
+
+        # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
+        # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
+        # from Python.
+        cmake_args = [
+            #f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
+            f"-DPYTHON_EXECUTABLE={sys.executable}",
+            f"-DCMAKE_BUILD_TYPE={cfg}",  # not used on MSVC, but no harm
+        ]
+        build_args = ["--target", "bianbuai_pybind11_state"]
+        install_args = ["--component", "pybind11", "--prefix", "."]
+        # Adding CMake arguments set as environment variable
+        # (needed e.g. to build for ARM OSx on conda-forge)
+        if "CMAKE_ARGS" in os.environ:
+            cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
+
+        # Pass other necessary args for pybind11 cmake project
+        onnxruntime_install_dir = os.environ.get("ORT_HOME", "/usr")
+        opencv4_cmake_dir = os.environ.get("OPENCV_DIR", "")
+        cmake_args += [
+            f"-DOpenCV_DIR={opencv4_cmake_dir}",
+            f"-DORT_HOME={onnxruntime_install_dir}",
+            f"-DPYTHON=ON",
+        ]
+
+        if self.compiler.compiler_type != "msvc":
+            # Using Ninja-build since it a) is available as a wheel and b)
+            # multithreads automatically. MSVC would require all variables be
+            # exported for Ninja to pick it up, which is a little tricky to do.
+            # Users can override the generator with CMAKE_GENERATOR in CMake
+            # 3.15+.
+            if not cmake_generator or cmake_generator == "Ninja":
+                try:
+                    import ninja
+
+                    ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
+                    cmake_args += [
+                        "-GNinja",
+                        f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
+                    ]
+                except ImportError:
+                    pass
+
+        else:
+            # Single config generators are handled "normally"
+            single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
+
+            # CMake allows an arch-in-generator style for backward compatibility
+            contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
+
+            # Specify the arch if using MSVC generator, but only if it doesn't
+            # contain a backward-compatibility arch spec already in the
+            # generator name.
+            if not single_config and not contains_arch:
+                cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
+
+            # Multi-config generators have a different way to specify configs
+            if not single_config:
+                cmake_args += [
+                    f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"
+                ]
+                build_args += ["--config", cfg]
+
+        # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
+        # across all generators.
+        if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
+            # self.parallel is a Python 3 only way to set parallel jobs by hand
+            # using -j in the build_ext call, not supported by pip or PyPA-build.
+            if hasattr(self, "parallel") and self.parallel:
+                # CMake 3.12+ only.
+                build_args += [f"-j{self.parallel}"]
+
+        build_temp = Path(self.build_temp) / ext.name
+        if not build_temp.exists():
+            build_temp.mkdir(parents=True)
+
+        subprocess.run(
+            ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True
+        )
+        #print("cmake args:", cmake_args)
+        subprocess.run(
+            ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
+        )
+        #print("build args:", build_args)
+        subprocess.run(
+            ["cmake", "--install", ".", *install_args], cwd=build_temp, check=True
+        )
+        #print("install args:", install_args)
+
+        # Copy installed libraries files to Python modules directory
+        shutil.copytree(os.path.join(build_temp, "lib"), f"{extdir}", dirs_exist_ok=True,)
+
+
+# Get Git Version
+try:
+    git_version = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=TOP_DIR).decode('ascii').strip()
+except (OSError, subprocess.CalledProcessError):
+    git_version = None
+print("GIT VERSION:", git_version)
+
+# Get Release Version Number
+with open(os.path.join(TOP_DIR, 'VERSION_NUMBER')) as version_file:
+    VersionInfo = namedtuple('VersionInfo', ['version', 'git_version'])(
+        version=version_file.read().strip(),
+        git_version=git_version
+    )
+
+# Description
+README = os.path.join(TOP_DIR, "README.md")
+with open(README, encoding="utf-8") as fdesc:
+    long_description = fdesc.read()
+
+
+classifiers = [
+    'Development Status :: 5 - Production/Stable',
+    'Intended Audience :: Developers',
+    'Intended Audience :: Education',
+    'Intended Audience :: Science/Research',
+    'Topic :: Scientific/Engineering',
+    'Topic :: Scientific/Engineering :: Mathematics',
+    'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    'Topic :: Software Development',
+    'Topic :: Software Development :: Libraries',
+    'Topic :: Software Development :: Libraries :: Python Modules',
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 3 :: Only'
+    'Programming Language :: Python :: 3.6',
+    'Programming Language :: Python :: 3.7',
+    'Programming Language :: Python :: 3.8',
+    'Programming Language :: Python :: 3.9',
+    'Programming Language :: Python :: 3.10',
+    'Programming Language :: Python :: 3.11',
+]
+
+if __name__ == "__main__":
+    # The information here can also be placed in setup.cfg - better separation of
+    # logic and declaration, and simpler if you include description/version in a file.
+    setup(
+        name=package_name,
+        version=VersionInfo.version,
+        description="Bianbu AI Support Python Package",
+        long_description=long_description,
+        long_description_content_type="text/markdown",
+        setup_requires=[],
+        tests_require=[],
+        cmdclass={"build_ext": CMakeBuild},
+        packages=find_packages("python", exclude=[]),
+        package_dir={"":"python"}, # tell distutils packages are under "python/"
+        ext_modules=[CMakeExtension(package_name)],
+        #package_data={},
+        include_package_data=True,
+        license='Apache License v2.0',
+        author="bianbu-ai-support",
+        author_email="bianbu-ai-support@spacemit.com",
+        url='https://gitlab.dc.com:8443/bianbu/ai/support',
+        install_requires=[],
+        entry_points={},
+        scripts=[],
+        python_requires=">=3.6",
+        classifiers=classifiers,
+        zip_safe=False,
+    )
--- a/python/utils.h
+++ b/python/utils.h
@ -0,0 +1,36 @@
+#ifndef SUPPORT_PYTHON_UTILS_H_
+#define SUPPORT_PYTHON_UTILS_H_
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+
+#include <string>
+
+#include "opencv2/opencv.hpp"
+namespace py = pybind11;
+struct Box {
+  int x1;
+  int y1;
+  int x2;
+  int y2;
+  float score;
+  std::string label_text;
+  unsigned int label;
+  Box(int x1 = 0, int y1 = 0, int x2 = 0, int y2 = 0, float score = 0.0,
+      std::string label_text = "", unsigned int label = 0)
+      : x1(x1),
+        y1(y1),
+        x2(x2),
+        y2(y2),
+        score(score),
+        label_text(label_text),
+        label(label) {}
+};
+
+cv::Mat numpy_uint8_3c_to_cv_mat(const py::array_t<unsigned char> &input) {
+  py::buffer_info buf = input.request();
+  cv::Mat mat(buf.shape[0], buf.shape[1], CV_8UC3, (unsigned char *)buf.ptr);
+  return mat;
+}
+
+#endif  // SUPPORT_PYTHON_UTILS_H_
--- a/python/wrapper.cc
+++ b/python/wrapper.cc
@ -0,0 +1,176 @@
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "task/vision/image_classification_task.h"
+#include "task/vision/image_classification_types.h"
+#include "task/vision/object_detection_task.h"
+#include "task/vision/object_detection_types.h"
+#include "task/vision/pose_estimation_task.h"
+#include "task/vision/pose_estimation_types.h"
+#include "utils.h"
+
+namespace py = pybind11;
+
+struct PYImageClassificationTask {
+ public:
+  PYImageClassificationTask(const ImageClassificationOption &option) {
+    task_ = std::make_shared<ImageClassificationTask>(option);
+  }
+  int getInitFlag() { return task_->getInitFlag(); };
+  ImageClassificationResult Classify(const py::array_t<unsigned char> &input) {
+    cv::Mat img = numpy_uint8_3c_to_cv_mat(input);
+    return task_->Classify(img);
+  }
+
+ private:
+  std::shared_ptr<ImageClassificationTask> task_;
+};
+
+struct PYObjectDetectionTask {
+ public:
+  PYObjectDetectionTask(const ObjectDetectionOption &option) {
+    task_ = std::make_shared<ObjectDetectionTask>(option);
+  }
+  int getInitFlag() { return task_->getInitFlag(); };
+  std::vector<Box> Detect(const py::array_t<unsigned char> &input) {
+    cv::Mat img = numpy_uint8_3c_to_cv_mat(input);
+    ObjectDetectionResult result = task_->Detect(img);
+    std::vector<Box> result_boxes;
+    for (size_t i = 0; i < result.result_bboxes.size(); i++) {
+      Box box;
+      box.x1 = result.result_bboxes[i].x1;
+      box.y1 = result.result_bboxes[i].y1;
+      box.x2 = result.result_bboxes[i].x2;
+      box.y2 = result.result_bboxes[i].y2;
+      box.label_text = result.result_bboxes[i].label_text;
+      box.label = result.result_bboxes[i].label;
+      box.score = result.result_bboxes[i].score;
+      result_boxes.push_back(box);
+    }
+    return result_boxes;
+  }
+
+ private:
+  std::shared_ptr<ObjectDetectionTask> task_;
+};
+
+struct PYPoseEstimationTask {
+ public:
+  PYPoseEstimationTask(const PoseEstimationOption &option) {
+    task_ = std::make_shared<PoseEstimationTask>(option);
+  }
+  int getInitFlag() { return task_->getInitFlag(); };
+  std::vector<PosePoint> Estimate(const py::array_t<unsigned char> &input,
+                                  const Box &box) {
+    cv::Mat img = numpy_uint8_3c_to_cv_mat(input);
+    Boxi boxi;
+    boxi.x1 = box.x1;
+    boxi.y1 = box.y1;
+    boxi.x2 = box.x2;
+    boxi.y2 = box.y2;
+    boxi.label_text = box.label_text.c_str();
+    boxi.label = box.label;
+    boxi.score = box.score;
+    PoseEstimationResult result = task_->Estimate(img, boxi);
+    return result.result_points;
+  }
+
+ private:
+  std::shared_ptr<PoseEstimationTask> task_;
+};
+
+#define PYARG_COMMON                                      \
+  py::arg("model_path") = "", py::arg("label_path") = "", \
+  py::arg("intra_threads_num") = 2, py::arg("inter_threads_num") = 2
+#define PYARG_DETECT                                                  \
+  py::arg("score_threshold") = -1.f, py::arg("nms_threshold") = -1.f, \
+  py::arg("class_name_whitelist"), py::arg("class_name_blacklist")
+#define PYARG_HUMAN_POSE                                        \
+  py::arg("model_path") = "", py::arg("intra_threads_num") = 2, \
+  py::arg("inter_threads_num") = 2
+#define PYARG_COMMENT(x) "Option for " #x
+PYBIND11_MODULE(bianbuai_pybind11_state, m) {
+  py::class_<ImageClassificationOption>(m, "ImageClassificationOption")
+      .def(py::init())
+      .def(py::init<const std::string &, const std::string &, const int &,
+                    const int &>(),
+           PYARG_COMMENT(ImageClassificationTask), PYARG_COMMON)
+      .def_readwrite("model_path", &ImageClassificationOption::model_path)
+      .def_readwrite("label_path", &ImageClassificationOption::label_path)
+      .def_readwrite("intra_threads_num",
+                     &ImageClassificationOption::intra_threads_num)
+      .def_readwrite("inter_threads_num",
+                     &ImageClassificationOption::inter_threads_num);
+  py::class_<ImageClassificationResult>(m, "ImageClassificationResult")
+      .def_readwrite("label_text", &ImageClassificationResult::label_text)
+      .def_readwrite("label_index", &ImageClassificationResult::label)
+      .def_readwrite("score", &ImageClassificationResult::score);
+  py::class_<PYImageClassificationTask>(m, "ImageClassificationTask")
+      .def(py::init<const ImageClassificationOption &>())
+      .def("getInitFlag", &PYImageClassificationTask::getInitFlag)
+      .def("Classify", &PYImageClassificationTask::Classify);
+
+  py::class_<ObjectDetectionOption>(m, "ObjectDetectionOption")
+      .def(py::init())
+      .def(py::init<const std::string &, const std::string &, const int &,
+                    const int &, const int, const int, const std::vector<int> &,
+                    const std::vector<int> &>(),
+           PYARG_COMMENT(ObjectDetectionTask), PYARG_COMMON, PYARG_DETECT)
+      .def_readwrite("model_path", &ObjectDetectionOption::model_path)
+      .def_readwrite("label_path", &ObjectDetectionOption::label_path)
+      .def_readwrite("intra_threads_num",
+                     &ObjectDetectionOption::intra_threads_num)
+      .def_readwrite("inter_threads_num",
+                     &ObjectDetectionOption::inter_threads_num)
+      .def_readwrite("score_threshold", &ObjectDetectionOption::score_threshold)
+      .def_readwrite("nms_threshold", &ObjectDetectionOption::nms_threshold)
+      .def_readwrite("class_name_whitelist",
+                     &ObjectDetectionOption::class_name_whitelist)
+      .def_readwrite("class_name_blacklist",
+                     &ObjectDetectionOption::class_name_blacklist);
+  py::class_<Box>(m, "Box")
+      .def(py::init<int, int, int, int, float, std::string, unsigned int>(),
+           "Box Info for Object Detection Task", py::arg("x1") = 0,
+           py::arg("y1") = 0, py::arg("x2") = 0, py::arg("y2") = 0,
+           py::arg("score") = 0.f, py::arg("label_text") = "",
+           py::arg("label") = 0)
+      .def_readwrite("x1", &Box::x1)
+      .def_readwrite("y1", &Box::y1)
+      .def_readwrite("x2", &Box::x2)
+      .def_readwrite("y2", &Box::y2)
+      .def_readwrite("score", &Box::score)
+      .def_readwrite("label_text", &Box::label_text)
+      .def_readwrite("label", &Box::label);
+  py::class_<PYObjectDetectionTask>(m, "ObjectDetectionTask")
+      .def(py::init<const ObjectDetectionOption &>())
+      .def("getInitFlag", &PYObjectDetectionTask::getInitFlag)
+      .def("Detect", &PYObjectDetectionTask::Detect);
+
+  py::class_<PoseEstimationOption>(m, "PoseEstimationOption")
+      .def(py::init())
+      .def(py::init<const std::string &, const int &, const int &>(),
+           PYARG_COMMENT(PoseEstimationTask), PYARG_HUMAN_POSE)
+      .def_readwrite("model_path", &PoseEstimationOption::model_path)
+      .def_readwrite("intra_threads_num",
+                     &PoseEstimationOption::intra_threads_num)
+      .def_readwrite("inter_threads_num",
+                     &PoseEstimationOption::inter_threads_num);
+  py::class_<PYPoseEstimationTask>(m, "PoseEstimationTask")
+      .def(py::init<PoseEstimationOption &>())
+      .def("getInitFlag", &PYPoseEstimationTask::getInitFlag)
+      .def("Estimate", &PYPoseEstimationTask::Estimate);
+  py::class_<PosePoint>(m, "PosePoint")
+      .def(py::init())
+      .def(py::init<int, int, float>(),
+           "2D Point with score for Pose Estimation Task", py::arg("x") = 0,
+           py::arg("y") = 0, py::arg("score") = 0.f)
+      .def_readwrite("x", &PosePoint::x)
+      .def_readwrite("y", &PosePoint::y)
+      .def_readwrite("score", &PosePoint::score);
+}
--- a/rootfs/usr/share/applications/object-detection.desktop
+++ b/rootfs/usr/share/applications/object-detection.desktop
@ -1,10 +1,10 @@
 [Desktop Entry]
 Type=Application
 #Encoding=UTF-8
-Version=1.0.13
+Version=1.0.15
 Name=object-detection
 Name[en]=object-detection
 Name[zh_CN]=目标检测
-Exec=/usr/bin/detection_stream_demo /usr/share/ai-support/models/yolov6.json 0 camera_id
+Exec=/usr/bin/detection_stream_demo /usr/share/ai-support/models/yolov6.json 0
 Terminal=true
 Icon=/usr/share/icons/bianbu-ai-demo/object-detection.png
--- a/rootfs/usr/share/applications/pose-tracker.desktop
+++ b/rootfs/usr/share/applications/pose-tracker.desktop
@ -1,10 +1,10 @@
 [Desktop Entry]
 Type=Application
 #Encoding=UTF-8
-Version=1.0.13
+Version=1.0.15
 Name=pose-tracker
 Name[en]=pose-tracker
 Name[zh_CN]=姿态追踪
-Exec=/usr/bin/tracker_stream_demo /usr/share/ai-support/models/yolov6.json /usr/share/ai-support/models/rtmpose.json 0 cameraId
+Exec=/usr/bin/tracker_stream_demo /usr/share/ai-support/models/yolov6.json /usr/share/ai-support/models/rtmpose.json 0
 Terminal=true
 Icon=/usr/share/icons/bianbu-ai-demo/pose-tracker.png
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1,7 +1,3 @@
-file(READ ${CMAKE_SOURCE_DIR}/VERSION_NUMBER VERSION_CONTENT)
-string(STRIP "${VERSION_CONTENT}" VERSION_CONTENT)
-project(bianbuai-support-library VERSION ${VERSION_CONTENT})
-
 set(CMAKE_CXX_STANDARD 14)
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive")

@ -46,8 +42,8 @@ endif()
 link_directories(${OPENCV_LIB})
 link_directories(${ORT_HOME}/lib)

-file(GLOB_RECURSE SRC_FILES "./*.cpp" "./*.c" "./*.cc")
-add_library(bianbuai SHARED ${SRC_FILES})
+file(GLOB_RECURSE BIANBU_SRC_FILES "${CMAKE_SOURCE_DIR}/src/*.cc")
+add_library(bianbuai SHARED ${BIANBU_SRC_FILES})
 set_target_properties(bianbuai PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})

 if (EXISTS "${ORT_HOME}/lib/libspacemit_ep.so")
@ -57,7 +53,7 @@ endif()

 target_include_directories(bianbuai PUBLIC  ${CMAKE_SOURCE_DIR}/include)
 target_include_directories(bianbuai PRIVATE ${CMAKE_SOURCE_DIR})
-target_include_directories(bianbuai SYSTEM PUBLIC  ${OPENCV_INC})
+target_include_directories(bianbuai SYSTEM PUBLIC  ${OPENCV_INC}) # for demo
 target_include_directories(bianbuai SYSTEM PRIVATE ${ORT_HOME}/include ${ORT_HOME}/include/onnxruntime)

 set(HIDE_SYMBOLS_LINKER_FLAGS "-Wl,--exclude-libs,ALL")
@ -83,9 +79,6 @@ install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/task ${CMAKE_SOURCE_DIR}/include/u
        DESTINATION include/bianbuai
        FILES_MATCHING PATTERN "*.h")

-#install(DIRECTORY ${ORT_HOME}/include ${ORT_HOME}/lib
-#        DESTINATION lib/3rdparty/onnxruntime
-#        FILES_MATCHING PATTERN "*.h" PATTERN "*onnxruntime*.so*" PATTERN "*spacemit*.so*")
-#if (EXISTS "${ORT_HOME}/bin")
-#  install(DIRECTORY ${ORT_HOME}/bin DESTINATION lib/3rdparty/onnxruntime)
-#endif()
+if (PYTHON)
+  include(${CMAKE_SOURCE_DIR}/python/bianbuai_pybind11_state.cmake)
+endif()
--- a/src/core/engine.cc
+++ b/src/core/engine.cc
@ -1,7 +1,6 @@
-#include "src/core/engine.h"
+#include "engine.h"

 #include "src/utils/utils.h"
-#include "utils/utils.h"

 int Engine::Init(const std::string &instance_name,
                 const std::string &model_file_path,
--- a/src/core/engine.h
+++ b/src/core/engine.h
@ -5,7 +5,8 @@
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/core/ort_wrapper.h"
+#include "ort_wrapper.h"
+
 class Engine {
 public:
  Engine() { OrtWrapper ortwrapper_; }
--- a/src/core/ort_wrapper.cc
+++ b/src/core/ort_wrapper.cc
@ -1,9 +1,9 @@
-#include "src/core/ort_wrapper.h"
+#include "ort_wrapper.h"

 #include <stdlib.h>  // for: getenv atoi

 #include <memory>
-#include <utility>  // for move
+#include <utility>  // for: move

 #ifdef _WIN32
 #include "src/utils/utils.h"
@ -89,6 +89,43 @@ int OrtWrapper::Init(const std::string &instance_name,
    return -1;
  }
  session_ = std::move(session);
+
+  // init onnxruntime allocator.
+  Ort::AllocatorWithDefaultOptions allocator;
+
+  // input names initial and build
+  num_inputs_ = session_->GetInputCount();
+  input_node_names_.resize(num_inputs_);
+  input_names_.resize(num_inputs_, "");
+
+  for (size_t i = 0; i < num_inputs_; ++i) {
+    auto input_name = session_->GetInputNameAllocated(i, allocator);
+    input_names_[i].append(input_name.get());
+    input_node_names_[i] = input_names_[i].c_str();
+  }
+
+  // input node dims and input dims
+  input_node_dims_ = GetInputDims();
+
+  // input tensor size
+  input_tensor_size_.resize(input_node_dims_.size());
+  for (size_t i = 0; i < num_inputs_; ++i) {
+    input_tensor_size_[i] = 1;
+    for (size_t j = 0; j < input_node_dims_[i].size(); ++j) {
+      input_tensor_size_[i] *= input_node_dims_[i][j];
+    }
+  }
+
+  // output names initial and build
+  num_outputs_ = session_->GetOutputCount();
+  output_node_names_.resize(num_outputs_);
+  output_names_.resize(num_outputs_, "");
+
+  for (size_t i = 0; i < num_outputs_; ++i) {
+    auto output_name = session_->GetOutputNameAllocated(i, allocator);
+    output_names_[i].append(output_name.get());
+    output_node_names_[i] = output_names_[i].c_str();
+  }
  return 0;
 }

@ -125,66 +162,20 @@ std::vector<Ort::Value> OrtWrapper::Invoke(
 #ifdef DEBUG
  TimeWatcher t("|-- Infer tensor");
 #endif
-  // init onnxruntime allocator.
-  Ort::AllocatorWithDefaultOptions allocator;
-
-  // input names initial and build
-  std::vector<const char *> input_node_names;
-  std::vector<std::string> input_names;
-  size_t num_inputs = session_->GetInputCount();
-  input_node_names.resize(num_inputs);
-  for (size_t i = 0; i < num_inputs; i++) {
-    input_names.push_back(std::string(""));
-  }
-
-  for (size_t i = 0; i < num_inputs; ++i) {
-    auto input_name = session_->GetInputNameAllocated(i, allocator);
-    input_names[i].append(input_name.get());
-    input_node_names[i] = input_names[i].c_str();
-  }
-
-  // input node dims and input dims
-  auto input_node_dims = GetInputDims();
-
-  // input tensor size
-  std::vector<size_t> input_tensor_size;
-  input_tensor_size.resize(input_node_dims.size());
-  for (size_t i = 0; i < num_inputs; ++i) {
-    input_tensor_size[i] = 1;
-    for (size_t j = 0; j < input_node_dims[i].size(); ++j) {
-      input_tensor_size[i] *= input_node_dims[i][j];
-    }
-  }
-
-  // output names initial and build
-  std::vector<const char *> output_node_names;
-  std::vector<std::string> output_names;
-  size_t num_outputs = session_->GetOutputCount();
-  output_node_names.resize(num_outputs);
-  for (size_t i = 0; i < num_outputs; i++) {
-    output_names.push_back(std::string(""));
-  }
-
-  for (size_t i = 0; i < num_outputs; ++i) {
-    auto output_name = session_->GetOutputNameAllocated(i, allocator);
-    output_names[i].append(output_name.get());
-    output_node_names[i] = output_names[i].c_str();
-  }
-
  // init and build input tensors
  std::vector<Ort::Value> input_tensors;
  Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
      OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);

-  for (size_t i = 0; i < num_inputs; i++) {
+  for (size_t i = 0; i < num_inputs_; i++) {
    input_tensors.push_back(Ort::Value::CreateTensor<float>(
-        memoryInfo, input_tensor_values[i].data(), input_tensor_size[i],
-        input_node_dims[i].data(), input_node_dims[i].size()));
+        memoryInfo, input_tensor_values[i].data(), input_tensor_size_[i],
+        input_node_dims_[i].data(), input_node_dims_[i].size()));
  }

  // run model
  auto outputTensors = session_->Run(
-      Ort::RunOptions{nullptr}, input_node_names.data(), input_tensors.data(),
-      num_inputs, output_node_names.data(), num_outputs);
+      Ort::RunOptions{nullptr}, input_node_names_.data(), input_tensors.data(),
+      num_inputs_, output_node_names_.data(), num_outputs_);
  return outputTensors;
 }
--- a/src/core/ort_wrapper.h
+++ b/src/core/ort_wrapper.h
@ -1,10 +1,7 @@
 #ifndef SUPPORT_SRC_CORE_ORT_WRAPPER_H_
 #define SUPPORT_SRC_CORE_ORT_WRAPPER_H_

-#include <cmath>
-#include <iostream>
 #include <memory>
-#include <numeric>
 #include <string>
 #include <vector>

@ -31,5 +28,13 @@ class OrtWrapper {
  std::unique_ptr<Ort::Env> env_;
  Ort::SessionOptions sessionOptions_;
  std::unique_ptr<Ort::Session> session_;
+  std::vector<const char*> input_node_names_;
+  std::vector<std::string> input_names_;
+  std::vector<const char*> output_node_names_;
+  std::vector<std::string> output_names_;
+  size_t num_inputs_;
+  size_t num_outputs_;
+  std::vector<size_t> input_tensor_size_;
+  std::vector<std::vector<int64_t>> input_node_dims_;
 };
 #endif  // SUPPORT_SRC_CORE_ORT_WRAPPER_H_
--- a/src/processor/classification_postprocessor.cc
+++ b/src/processor/classification_postprocessor.cc
@ -1,6 +1,8 @@
-#include "src/processor/classification_postprocessor.h"
+#include "classification_postprocessor.h"

-#include <limits>  // for numeric_limits<>
+#include <cmath>      // for: std::exp
+#include <limits>     // for: numeric_limits<>
+#include <stdexcept>  // for: runtime_error

 #include "utils/time.h"

--- a/src/processor/classification_postprocessor.h
+++ b/src/processor/classification_postprocessor.h
@ -1,14 +1,11 @@
 #ifndef SUPPORT_SRC_PROCESSOR_CLASSIFICATION_POSTPROCESSOR_H_
 #define SUPPORT_SRC_PROCESSOR_CLASSIFICATION_POSTPROCESSOR_H_

-#include <cmath>
-#include <iostream>
-#include <stdexcept>  // To use runtime_error
 #include <string>
 #include <vector>

 #include "onnxruntime_cxx_api.h"
-#include "src/processor/processor.h"
+#include "processor.h"
 #include "task/vision/image_classification_types.h"

 class ClassificationPostprocessor : public Postprocessor {
--- a/src/processor/classification_preprocessor.cc
+++ b/src/processor/classification_preprocessor.cc
@ -1,4 +1,4 @@
-#include "src/processor/classification_preprocessor.h"
+#include "classification_preprocessor.h"

 #include "utils/time.h"

--- a/src/processor/classification_preprocessor.h
+++ b/src/processor/classification_preprocessor.h
@ -1,14 +1,10 @@
 #ifndef SUPPORT_SRC_PROCESSOR_CLASSIFICATION_PREPROCESSOR_H_
 #define SUPPORT_SRC_PROCESSOR_CLASSIFICATION_PREPROCESSOR_H_

-#include <fstream>
-#include <iostream>
-#include <stdexcept>
-#include <string>
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/processor/processor.h"
+#include "processor.h"

 class ClassificationPreprocessor : public Preprocessor {
 public:
--- a/src/processor/detection_postprocessor.cc
+++ b/src/processor/detection_postprocessor.cc
@ -1,5 +1,8 @@
-#include "src/processor/detection_postprocessor.h"
+#include "detection_postprocessor.h"

+#include <math.h>  // for: exp
+
+#include "src/utils/utils.h"
 #include "utils/time.h"
 #include "utils/utils.h"

--- a/src/processor/detection_postprocessor.h
+++ b/src/processor/detection_postprocessor.h
@ -1,15 +1,11 @@
 #ifndef SUPPORT_SRC_PROCESSOR_DETECTION_POSTPROCESSOR_H_
 #define SUPPORT_SRC_PROCESSOR_DETECTION_POSTPROCESSOR_H_

-#include <algorithm>
-#include <iostream>
-#include <string>
 #include <vector>

 #include "onnxruntime_cxx_api.h"
-#include "src/processor/processor.h"
+#include "processor.h"
 #include "src/utils/nms_utils.h"
-#include "src/utils/utils.h"
 #include "task/vision/object_detection_types.h"

 class DetectionPostprocessor : public Postprocessor {
--- a/src/processor/detection_preprocessor.cc
+++ b/src/processor/detection_preprocessor.cc
@ -1,4 +1,4 @@
-#include "src/processor/detection_preprocessor.h"
+#include "detection_preprocessor.h"

 #include "utils/time.h"
 #include "utils/utils.h"
--- a/src/processor/detection_preprocessor.h
+++ b/src/processor/detection_preprocessor.h
@ -1,12 +1,10 @@
 #ifndef SUPPORT_SRC_PROCESSOR_DETECTION_PREPROCESSOR_H_
 #define SUPPORT_SRC_PROCESSOR_DETECTION_PREPROCESSOR_H_

-#include <chrono>
-#include <string>
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/processor/processor.h"
+#include "processor.h"
 #include "src/utils/cv2_utils.h"
 #include "src/utils/nms_utils.h"

--- a/src/processor/estimation_postprocessor.cc
+++ b/src/processor/estimation_postprocessor.cc
@ -1,4 +1,4 @@
-#include "src/processor/estimation_postprocessor.h"
+#include "estimation_postprocessor.h"

 #include "utils/time.h"

@ -21,8 +21,8 @@ void EstimationPostprocessor::Postprocess(
  int extend_width = simcc_x_dims[2];
  int extend_height = simcc_y_dims[2];

-  float *simcc_x_result = output_tensors[0].GetTensorMutableData<float>();
-  float *simcc_y_result = output_tensors[1].GetTensorMutableData<float>();
+  const float *simcc_x_result = output_tensors[0].GetTensorData<float>();
+  const float *simcc_y_result = output_tensors[1].GetTensorData<float>();

  for (int i = 0; i < joint_num; ++i) {
    // find the maximum and maximum indexes in the value of each Extend_width
--- a/src/processor/estimation_postprocessor.h
+++ b/src/processor/estimation_postprocessor.h
@ -1,15 +1,12 @@
 #ifndef SUPPORT_SRC_PROCESSOR_ESTIMATION_POSTPROCESSOR_H_
 #define SUPPORT_SRC_PROCESSOR_ESTIMATION_POSTPROCESSOR_H_

-#include <algorithm>
-#include <iostream>
-#include <string>
+#include <utility>  // for std::pair
 #include <vector>

+#include "opencv2/opencv.hpp"
 #include "onnxruntime_cxx_api.h"
 #include "src/processor/processor.h"
-#include "src/utils/nms_utils.h"
-#include "src/utils/utils.h"
 #include "task/vision/pose_estimation_types.h"

 class EstimationPostprocessor : public Postprocessor {
--- a/src/processor/estimation_preprocessor.cc
+++ b/src/processor/estimation_preprocessor.cc
@ -1,9 +1,8 @@
-#include "src/processor/estimation_preprocessor.h"
+#include "estimation_preprocessor.h"

 #include "src/utils/cv2_utils.h"
 #include "src/utils/nms_utils.h"
 #include "utils/time.h"
-#include "utils/utils.h"

 void EstimationPreprocessor::Preprocess(
    const cv::Mat& mat, const Boxi& box,
--- a/src/processor/estimation_preprocessor.h
+++ b/src/processor/estimation_preprocessor.h
@ -2,7 +2,6 @@
 #define SUPPORT_SRC_PROCESSOR_ESTIMATION_PREPROCESSOR_H_

 #include <chrono>
-#include <string>
 #include <utility>
 #include <vector>

--- a/src/task/vision/base_vision_task_api.h
+++ b/src/task/vision/base_vision_task_api.h
@ -1,9 +1,6 @@
 #ifndef _BASE_VISION_TASK_API_H_
 #define _BASE_VISION_TASK_API_H_

-#include <string>
-#include <vector>
-
 #include "opencv2/opencv.hpp"
 #include "src/task/core/base_task_api.h"
 #include "task/vision/object_detection_types.h"
--- a/src/task/vision/imageclassification/image_classification.cc
+++ b/src/task/vision/imageclassification/image_classification.cc
@ -1,9 +1,9 @@
-#include "src/task/vision/imageclassification/image_classification.h"
+#include "image_classification.h"

 #include <iostream>

+#include "src/utils/utils.h"
 #include "utils/time.h"
-#include "utils/utils.h"

 int ImageClassification::InitFromOption(
    const ImageClassificationOption &option) {
--- a/src/task/vision/imageclassification/image_classification.h
+++ b/src/task/vision/imageclassification/image_classification.h
@ -1,22 +1,13 @@
 #ifndef SUPPORT_SRC_TASK_VISION_ImageClassification_IMAGE_CLASSIFICATION_H_
 #define SUPPORT_SRC_TASK_VISION_ImageClassification_IMAGE_CLASSIFICATION_H_

-#include <cmath>
-#include <exception>
-#include <fstream>
-#include <iostream>
-#include <limits>
-#include <memory>
-#include <numeric>
 #include <string>
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/core/engine.h"
 #include "src/processor/classification_postprocessor.h"
 #include "src/processor/classification_preprocessor.h"
 #include "src/task/vision/base_vision_task_api.h"
-#include "src/utils/utils.h"
 #include "task/vision/image_classification_types.h"

 class ImageClassification
--- a/src/task/vision/imageclassification/image_classification_task.cc
+++ b/src/task/vision/imageclassification/image_classification_task.cc
@ -1,26 +1,12 @@
 #include "task/vision/image_classification_task.h"

-#include "include/utils/utils.h"
-#include "src/task/vision/imageclassification/image_classification.h"
-#include "src/utils/utils.h"
+#include "image_classification.h"

 class ImageClassificationTask::impl {
 public:
  std::unique_ptr<ImageClassification> imageclassification_;
 };

-ImageClassificationTask::ImageClassificationTask(
-    const std::string& config_file_path)
-    : pimpl_(std::make_unique<impl>()) {
-  init_flag_ = -1;
-  pimpl_->imageclassification_ =
-      std::unique_ptr<ImageClassification>(new ImageClassification());
-  ImageClassificationOption option;
-  if (!configToOption(config_file_path, option)) {
-    init_flag_ = pimpl_->imageclassification_->InitFromOption(option);
-  }
-}
-
 ImageClassificationTask::ImageClassificationTask(
    const ImageClassificationOption& option)
    : pimpl_(std::make_unique<impl>()) {
--- a/src/task/vision/objectdetection/object_detection.cc
+++ b/src/task/vision/objectdetection/object_detection.cc
@ -1,12 +1,10 @@
-#include "src/task/vision/objectdetection/object_detection.h"
+#include "object_detection.h"

 #include <chrono>
-#include <fstream>
+#include <iostream>

-#include "src/utils/json.hpp"
+#include "src/utils/utils.h"
 #include "utils/time.h"
-#include "utils/utils.h"
-using json = nlohmann::json;

 std::vector<std::vector<float>> ObjectDetection::Process(
    const cv::Mat &img_raw) {
--- a/src/task/vision/objectdetection/object_detection.h
+++ b/src/task/vision/objectdetection/object_detection.h
@ -1,16 +1,13 @@
 #ifndef SUPPORT_SRC_TASK_VISION_OBJECTDETECTION_OBJECT_DETECTION_H_
 #define SUPPORT_SRC_TASK_VISION_OBJECTDETECTION_OBJECT_DETECTION_H_

-#include <iostream>
 #include <string>
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/core/engine.h"
 #include "src/processor/detection_postprocessor.h"
 #include "src/processor/detection_preprocessor.h"
 #include "src/task/vision/base_vision_task_api.h"
-#include "src/utils/utils.h"
 #include "task/vision/object_detection_types.h"

 class ObjectDetection : public BaseVisionTaskApi<ObjectDetectionResult> {
--- a/src/task/vision/objectdetection/object_detection_task.cc
+++ b/src/task/vision/objectdetection/object_detection_task.cc
@ -1,8 +1,6 @@
 #include "task/vision/object_detection_task.h"

-#include "include/utils/utils.h"
-#include "src/task/vision/objectdetection/object_detection.h"
-#include "src/utils/utils.h"
+#include "object_detection.h"

 class ObjectDetectionTask::impl {
 public:
@ -17,17 +15,6 @@ ObjectDetectionTask::ObjectDetectionTask(const ObjectDetectionOption &option)
  init_flag_ = pimpl_->objectdetection_->InitFromOption(option);
 }

-ObjectDetectionTask::ObjectDetectionTask(const std::string &config_file_path)
-    : pimpl_(std::make_unique<impl>()) {
-  init_flag_ = -1;
-  pimpl_->objectdetection_ =
-      std::unique_ptr<ObjectDetection>(new ObjectDetection());
-  ObjectDetectionOption option;
-  if (!configToOption(config_file_path, option)) {
-    init_flag_ = pimpl_->objectdetection_->InitFromOption(option);
-  }
-}
-
 int ObjectDetectionTask::getInitFlag() { return init_flag_; }

 ObjectDetectionResult ObjectDetectionTask::Detect(const cv::Mat &img_raw) {
--- a/src/task/vision/poseestimation/pose_estimation.cc
+++ b/src/task/vision/poseestimation/pose_estimation.cc
@ -1,12 +1,10 @@
-#include "src/task/vision/poseestimation/pose_estimation.h"
+#include "pose_estimation.h"

 #include <chrono>

-#include "src/utils/json.hpp"
+#include "src/utils/cv2_utils.h"  // for: CHW
 #include "utils/time.h"

-using json = nlohmann::json;
-
 PoseEstimationResult PoseEstimation::Estimate(const cv::Mat &img_raw,
                                              const Boxi &box) {
  result_points_.clear();
--- a/src/task/vision/poseestimation/pose_estimation.h
+++ b/src/task/vision/poseestimation/pose_estimation.h
@ -1,17 +1,14 @@
 #ifndef SUPPORT_SRC_TASK_VISION_POSEESTIMATION_POSE_ESTIMATION_H_
 #define SUPPORT_SRC_TASK_VISION_POSEESTIMATION_POSE_ESTIMATION_H_

-#include <iostream>
 #include <string>
 #include <utility>  // for pair<>
 #include <vector>

 #include "opencv2/opencv.hpp"
-#include "src/core/engine.h"
 #include "src/processor/estimation_postprocessor.h"
 #include "src/processor/estimation_preprocessor.h"
 #include "src/task/vision/base_vision_task_api.h"
-#include "src/utils/cv2_utils.h"
 #include "task/vision/pose_estimation_types.h"

 class PoseEstimation : public BaseVisionTaskApi<PoseEstimationResult> {
--- a/src/task/vision/poseestimation/pose_estimation_task.cc
+++ b/src/task/vision/poseestimation/pose_estimation_task.cc
@ -1,8 +1,6 @@
 #include "task/vision/pose_estimation_task.h"

-#include "include/utils/utils.h"
-#include "src/task/vision/poseestimation/pose_estimation.h"
-#include "src/utils/utils.h"
+#include "pose_estimation.h"

 class PoseEstimationTask::impl {
 public:
@ -17,17 +15,6 @@ PoseEstimationTask::PoseEstimationTask(const PoseEstimationOption &option)
  init_flag_ = pimpl_->poseestimation_->InitFromOption(option);
 }

-PoseEstimationTask::PoseEstimationTask(const std::string &config_file_path)
-    : pimpl_(std::make_unique<impl>()) {
-  init_flag_ = -1;
-  pimpl_->poseestimation_ =
-      std::unique_ptr<PoseEstimation>(new PoseEstimation());
-  PoseEstimationOption option;
-  if (!configToOption(config_file_path, option)) {
-    init_flag_ = pimpl_->poseestimation_->InitFromOption(option);
-  }
-}
-
 int PoseEstimationTask::getInitFlag() { return init_flag_; }

 PoseEstimationResult PoseEstimationTask::Estimate(const cv::Mat &img_raw,
--- a/src/utils/cv2_utils.cc
+++ b/src/utils/cv2_utils.cc
@ -1,32 +1,24 @@
-#ifndef SUPPORT_SRC_UTILS_CV2_UTILS_H_
-#define SUPPORT_SRC_UTILS_CV2_UTILS_H_
-
-#include "src/utils/cv2_utils.h"
-
-#include <algorithm>
-#include <iostream>
-#include <stdexcept>
-#include <vector>
+#include "cv2_utils.h"

 #include "opencv2/opencv.hpp"

-enum { CHW = 0, HWC = 1 };
-
 cv::Mat normalize(const cv::Mat &mat, float mean, float scale) {
  cv::Mat matf;
-  if (mat.type() != CV_32FC3)
+  if (mat.type() != CV_32FC3) {
    mat.convertTo(matf, CV_32FC3);
-  else
+  } else {
    matf = mat;  // reference
+  }
  return (matf - mean) * scale;
 }

 cv::Mat normalize(const cv::Mat &mat, const float *mean, const float *scale) {
  cv::Mat mat_copy;
-  if (mat.type() != CV_32FC3)
+  if (mat.type() != CV_32FC3) {
    mat.convertTo(mat_copy, CV_32FC3);
-  else
+  } else {
    mat_copy = mat.clone();
+  }
  for (int i = 0; i < mat_copy.rows; ++i) {
    cv::Vec3f *p = mat_copy.ptr<cv::Vec3f>(i);
    for (int j = 0; j < mat_copy.cols; ++j) {
@ -43,15 +35,17 @@ void normalize(const cv::Mat &inmat, cv::Mat &outmat, float mean, float scale) {
 }

 void normalize_inplace(cv::Mat &mat_inplace, float mean, float scale) {
-  if (mat_inplace.type() != CV_32FC3)
+  if (mat_inplace.type() != CV_32FC3) {
    mat_inplace.convertTo(mat_inplace, CV_32FC3);
+  }
  normalize(mat_inplace, mat_inplace, mean, scale);
 }

 void normalize_inplace(cv::Mat &mat_inplace, const float *mean,
                       const float *scale) {
-  if (mat_inplace.type() != CV_32FC3)
+  if (mat_inplace.type() != CV_32FC3) {
    mat_inplace.convertTo(mat_inplace, CV_32FC3);
+  }
  for (int i = 0; i < mat_inplace.rows; ++i) {
    cv::Vec3f *p = mat_inplace.ptr<cv::Vec3f>(i);
    for (int j = 0; j < mat_inplace.cols; ++j) {
@ -64,7 +58,7 @@ void normalize_inplace(cv::Mat &mat_inplace, const float *mean,

 cv::Mat GetAffineTransform(float center_x, float center_y, float scale_width,
                           float scale_height, int output_image_width,
-                           int output_image_height, bool inverse = false) {
+                           int output_image_height, bool inverse) {
  // solve the affine transformation matrix

  // get the three points corresponding to the source picture and the target
@ -116,5 +110,3 @@ cv::Mat GetAffineTransform(float center_x, float center_y, float scale_width,

  return affineTransform;
 }
-
-#endif  // SUPPORT_SRC_UTILS_CV2_UTILS_H_
--- a/src/utils/nms_utils.cc
+++ b/src/utils/nms_utils.cc
@ -1,8 +1,7 @@
-#ifndef SUPPORT_SRC_UTILS_NMS_UTILS_H_
-#define SUPPORT_SRC_UTILS_NMS_UTILS_H_
-#include "src/utils/nms_utils.h"
+#include "nms_utils.h"

 #include <algorithm>
+#include <cmath>  // for: std::exp
 #include <vector>

 #include "task/vision/object_detection_types.h"
@ -147,5 +146,3 @@ void offset_nms(std::vector<Boxf> &input, std::vector<Boxf> &output,
    }
  }
 }
-
-#endif  // SUPPORT_SRC_UTILS_NMS_UTILS_H_
--- a/src/utils/utils.cc
+++ b/src/utils/utils.cc
@ -1,16 +1,13 @@
-#include "utils/utils.h"
+#include "utils.h"
+
+#include <math.h>  // for: exp

-#include <cmath>
 #include <cstdint>  // for: uint32_t
-#include <fstream>  // for ifstream
-#include <iostream>
+#include <fstream>  // for: ifstream
 #include <string>
 #include <vector>

-#include "include/utils/utils.h"
-#include "src/utils/json.hpp"
-#include "src/utils/utils.h"
-using json = nlohmann::json;
+#include "utils/utils.h"

 std::vector<std::string> readLabels(const std::string& label_file_path) {
  std::vector<std::string> labels;
@ -64,84 +61,3 @@ void resizeUnscale(const cv::Mat& mat, cv::Mat& mat_rs, int target_height,

  new_unpad_mat.copyTo(mat_rs(cv::Rect(dw, dh, new_unpad_w, new_unpad_h)));
 }
-
-int getConfig(const std::string& config_file_path, json& config) {
-  std::ifstream f(config_file_path);
-  try {
-    config = json::parse(f);
-  } catch (json::parse_error& ex) {
-    std::cout << "[ ERROR ]  Init fail, parse json config file fail"
-              << std::endl;
-    return 0;
-  }
-  return 1;
-}
-
-int configToOption(const std::string& config_file_path,
-                   ImageClassificationOption& option) {
-  json config;
-  if (!getConfig(config_file_path, config)) {
-    return -1;
-  }
-  std::string model_path = config["model_path"];
-  option.model_path = model_path;
-  std::string label_path = config["label_path"];
-  option.label_path = label_path;
-  if (config.contains("intra_threads_num")) {
-    option.intra_threads_num = config["intra_threads_num"];
-  }
-  if (config.contains("inter_threads_num")) {
-    option.inter_threads_num = config["inter_threads_num"];
-  }
-  return 0;
-}
-
-int configToOption(const std::string& config_file_path,
-                   ObjectDetectionOption& option) {
-  json config;
-  if (!getConfig(config_file_path, config)) {
-    return -1;
-  }
-  std::string model_path = config["model_path"];
-  option.model_path = model_path;
-  std::string label_path = config["label_path"];
-  option.label_path = label_path;
-  if (config.contains("intra_threads_num")) {
-    option.intra_threads_num = config["intra_threads_num"];
-  }
-  if (config.contains("inter_threads_num")) {
-    option.inter_threads_num = config["inter_threads_num"];
-  }
-  if (config.contains("score_threshold")) {
-    option.score_threshold = config["score_threshold"];
-  }
-  if (config.contains("nms_threshold")) {
-    option.nms_threshold = config["nms_threshold"];
-  }
-  if (config.contains("class_name_whitelist")) {
-    option.class_name_whitelist =
-        config["class_name_whitelist"].get<std::vector<int>>();
-  }
-  if (config.contains("class_name_blacklist")) {
-    option.class_name_blacklist =
-        config["class_name_blacklist"].get<std::vector<int>>();
-  }
-  return 0;
-}
-
-int configToOption(const std::string& config_file_path,
-                   PoseEstimationOption& option) {
-  json config;
-  if (!getConfig(config_file_path, config)) {
-    return -1;
-  }
-  std::string model_path = config["model_path"];
-  option.model_path = model_path;
-  if (config.contains("intra_threads_num")) {
-    option.intra_threads_num = config["intra_threads_num"];
-  }
-  if (config.contains("inter_threads_num")) {
-    option.inter_threads_num = config["inter_threads_num"];
-  }
-  return 0;
-}
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@ -2,8 +2,7 @@
 #define SUPPORT_SRC_UTILS_UTILS_H_

 #include <string>
-
-#include "include/utils/utils.h"
+#include <vector>

 std::vector<std::string> readLabels(const std::string& labelFilepath);

--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1 +0,0 @@
-add_executable(json_test json_test.cc)
--- a/tests/json_test.cc
+++ b/tests/json_test.cc
@ -1,20 +0,0 @@
-#include "../src/utils/json.hpp"
-
-#include <fstream>
-#include <iostream>
-
-using json = nlohmann::json;
-
-int main(int argc, char *argv[]) {
-  if (argc < 2) {
-    std::cout << "Usage: " << argv[0] << " <json_file_path>\n";
-    return 0;
-  }
-  std::ifstream f(argv[1]);
-  json config = json::parse(f);
-  if (config.contains("name")) {
-    std::string name = config["name"];
-    std::cout << name << std::endl;
-  }
-  return 0;
-}
--- a/tests/python/demo_image_classification.py
+++ b/tests/python/demo_image_classification.py
@ -0,0 +1,23 @@
+import argparse
+
+def get_argsparser():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser(description="Bianbu AI Python Demo for Image Classification.")
+    parser.add_argument("--image", "-i", type=str, required=True, help="input test image path")
+    parser.add_argument("--model", "-m", type=str, required=True, help="input test model(*.onnx) path")
+    parser.add_argument("--label", "-l", type=str, required=True, help="input test label path")
+    parser.add_argument("--intra", type=int, default=2, help="intra thread number for backend(e.g. onnxruntime)")
+    parser.add_argument("--inter", type=int, default=2, help="inter thread number for backend(e.g. onnxruntime)")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_argsparser()
+
+    import cv2
+    from bianbuai import ImageClassificationOption, ImageClassificationTask
+    option = ImageClassificationOption(args.model, args.label, args.intra, args.inter)
+    task = ImageClassificationTask(option)
+    output = task.Classify(cv2.imread(args.image))
+    print("Label:", output.label_text)
+    print("Score:", output.score)
--- a/tests/python/demo_object_detection.py
+++ b/tests/python/demo_object_detection.py
@ -0,0 +1,27 @@
+import argparse
+
+def get_argsparser():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser(description="Bianbu AI Python Demo for Object Detection.")
+    parser.add_argument("--image", "-i", type=str, required=True, help="input test image path")
+    parser.add_argument("--model", "-m", type=str, required=True, help="input test model(*.onnx) path")
+    parser.add_argument("--label", "-l", type=str, required=True, help="input test label path")
+    parser.add_argument("--intra", type=int, default=2, help="intra thread number for backend(e.g. onnxruntime)")
+    parser.add_argument("--inter", type=int, default=2, help="inter thread number for backend(e.g. onnxruntime)")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_argsparser()
+
+    import cv2
+    from bianbuai import ObjectDetectionOption, ObjectDetectionTask
+    option = ObjectDetectionOption()
+    option.model_path = args.model
+    option.label_path = args.label
+    option.intra_threads_num = args.intra
+    option.inter_threads_num = args.inter
+    task = ObjectDetectionTask(option)
+    outputs = task.Detect(cv2.imread(args.image))
+    for i, box in enumerate(outputs):
+        print("bbox[%2d] x1y1x2y2: (%4d,%4d,%4d,%4d), score: %5.3f, label_text: %s" % (i, box.x1, box.y1, box.x2, box.y2, box.score, box.label_text))
--- a/tests/python/demo_pose_points.py
+++ b/tests/python/demo_pose_points.py
@ -0,0 +1,42 @@
+import argparse
+
+def get_argsparser():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser(description="Bianbu AI Python Demo for Human Pose Points.")
+    parser.add_argument("--image", "-i", type=str, required=True, help="input test image path")
+    parser.add_argument("--label", "-l", type=str, required=True, help="input test label path")
+    parser.add_argument("--model-det", "-md", type=str, required=True, help="input detection model(*.onnx) path")
+    parser.add_argument("--model-pose", "-mp", type=str, required=True, help="input pose model(*.onnx) path")
+    parser.add_argument("--intra", type=int, default=2, help="intra thread number for backend(e.g. onnxruntime)")
+    parser.add_argument("--inter", type=int, default=2, help="inter thread number for backend(e.g. onnxruntime)")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_argsparser()
+
+    # object detection
+    from bianbuai import ObjectDetectionOption, ObjectDetectionTask
+    option_detect = ObjectDetectionOption()
+    option_detect.model_path = args.model_det
+    option_detect.label_path = args.label
+    option_detect.intra_threads_num = args.intra
+    option_detect.inter_threads_num = args.inter
+    task_detect = ObjectDetectionTask(option_detect)
+
+    # pose estimation
+    from bianbuai import PoseEstimationOption, PoseEstimationTask
+    option_pose = PoseEstimationOption()
+    option_pose.model_path = args.model_pose
+    option_pose.intra_threads_num = args.intra
+    option_pose.inter_threads_num = args.inter
+    task_pose = PoseEstimationTask(option_pose)
+    
+    import cv2
+    image = cv2.imread(args.image)
+    output_boxes = task_detect.Detect(image)
+    for i, box in enumerate(output_boxes):
+        print("bbox[%2d] x1y1x2y2: (%4d,%4d,%4d,%4d), score: %5.3f, label_text: %s" % (i, box.x1, box.y1, box.x2, box.y2, box.score, box.label_text))
+        pose_points = task_pose.Estimate(image, box)
+        for i, point in enumerate(pose_points):
+            print("  point[%2d] xy: (%4d,%4d), score: %5.3f" % (i, point.x, point.y, point.score))
--- a/tests/python/test_python_task.py
+++ b/tests/python/test_python_task.py
@ -0,0 +1,307 @@
+import os
+import platform
+import unittest
+
+import cv2
+try:
+    # python wheel package end2end test
+    import bianbuai
+except ImportError:
+    # local cmake project build output test
+    import bianbuai_pybind11_state as bianbuai
+
+target_arch = ["AMD64", "x86_64", "riscv64"]
+
+class TestInferenceSession(unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super(TestInferenceSession, self).__init__(*args, **kwargs)
+        self.addTypeEqualityFunc(bianbuai.Box, 'assertBoxEqual')
+
+
+    def for_arch(arch = target_arch):
+        def decorator(func):
+            def wrapper(self):
+                if self._arch in arch:
+                    func(self)
+                else:
+                    self.skipTest(f"Test is only for {arch} platform")
+            return wrapper
+        return decorator
+
+
+    def assertBoxEqual(self, first, second, msg=None, delta=None, label_only=False):
+        """Assert that two Object Detection Box are equal."""
+        self.assertIsInstance(first, bianbuai.Box, 'First argument is not a Box')
+        self.assertIsInstance(second, bianbuai.Box, 'Second argument is not a Box')
+
+        diff = ''
+        if first != second:
+            if (first.x1 != second.x1 or
+                first.y1 != second.y1 or
+                first.x2 != second.x2 or
+                first.y2 != second.y2) and not label_only:
+                diff += '%s != %s\n' % ([first.x1, first.y1, first.x2, first.y2],
+                                     [second.x1, second.y1, second.x2, second.y2])
+            if delta and delta < abs(first.score - second.score) and not label_only:
+                diff += '%s < abs(%s - %s)\n' % (delta, first.score, second.score)
+            if first.label_text != second.label_text:
+                diff += '%s != %s\n' % (first.label_text, second.label_text)
+            if first.label != second.label:
+                diff += '%s != %s\n' % (first.label, second.label)
+            standardMsg = self._truncateMessage("", diff)
+        if diff:
+            self.fail(self._formatMessage(msg, standardMsg))
+
+
+    def setUp(self):
+        "Method for setting up the test fixture before exercising it."
+        self._data = "data"
+        self._arch = platform.machine()
+
+
+    def tearDown(self):
+        "Method for deconstructing the test fixture after testing it."
+        pass
+
+
+    @for_arch(target_arch)
+    def testTaskImageClassification(self):
+        """
+        Test task image classification
+        """
+        image_path = os.path.join(self._data, "imgs", "dog.jpg")
+        model_path = os.path.join(self._data, "models", "squeezenet1.1-7.onnx")
+        label_path = os.path.join(self._data, "models", "synset.txt")
+        intra_thread_num, inter_thread_num = 2, 2
+        option = bianbuai.ImageClassificationOption(model_path, label_path, intra_thread_num, inter_thread_num)
+        task = bianbuai.ImageClassificationTask(option)
+        result = task.Classify(cv2.imread(image_path))
+        self.assertEqual(task.getInitFlag(), 0)
+        self.assertEqual(result.label_text, "n02113023 Pembroke, Pembroke Welsh corgi")
+        self.assertEqual(result.label_index, 263)
+        self.assertAlmostEqual(result.score, 0.8347830176353455, delta=1e-5)
+
+
+    @for_arch(target_arch)
+    def testTaskObjectDetectionImage(self):
+        """
+        Test task object detection with input image
+        """
+        image_path = os.path.join(self._data, "imgs", "person.jpg")
+        option = bianbuai.ObjectDetectionOption()
+        option.model_path = os.path.join(self._data, "models", "yolov6p5_n.q.onnx")
+        option.label_path = os.path.join(self._data, "models", "coco.txt")
+        option.intra_threads_num, option.inter_threads_num = 2, 2
+        task = bianbuai.ObjectDetectionTask(option)
+        self.assertEqual(task.getInitFlag(), 0)
+        output_a = task.Detect(cv2.imread(image_path))
+        # TODO: add expected outputs to support test with (e.g. Windows)
+        output_e = [
+            bianbuai.Box(1349, 408, 1513, 790, 0.673, "person", 0),
+            bianbuai.Box( 581, 439,  682, 704, 0.565, "person", 0) if self._arch != "riscv64" else bianbuai.Box( 581, 439,  677, 709, 0.565, "person", 0),
+            bianbuai.Box(1476, 421, 1592, 783, 0.522, "person", 0) if self._arch != "riscv64" else bianbuai.Box(1476, 418, 1594, 783, 0.522, "person", 0),
+            bianbuai.Box( 462, 439,  557, 711, 0.673, "person", 0) if self._arch != "riscv64" else bianbuai.Box( 462, 441,  559, 711, 0.522, "person", 0),
+        ]
+        self.assertEqual(len(output_a), len(output_e))
+        for i in range(len(output_a)):
+            self.assertBoxEqual(output_a[i], output_e[i], delta=1e-3, label_only=(self._arch == "AMD64"))
+
+
+    @for_arch(target_arch)
+    def testTaskPoseEstimationImage(self):
+        """
+        Test task pose estimation with input image
+        """
+        image_path = os.path.join(self._data, "imgs", "person.jpg")
+        # object detection
+        option_1 = bianbuai.ObjectDetectionOption()
+        option_1.model_path = os.path.join(self._data, "models", "yolov6p5_n.q.onnx")
+        option_1.label_path = os.path.join(self._data, "models", "coco.txt")
+        option_1.intra_threads_num, option_1.inter_threads_num = 2, 2
+        task_1 = bianbuai.ObjectDetectionTask(option_1)
+        self.assertEqual(task_1.getInitFlag(), 0)
+        output_1 = task_1.Detect(cv2.imread(image_path))
+        # pose estimation
+        option_2 = bianbuai.PoseEstimationOption()
+        option_2.model_path = os.path.join(self._data, "models", "rtmpose-t.q.onnx")
+        option_2.intra_threads_num, option_2.inter_threads_num = 2, 2
+        task_2 = bianbuai.PoseEstimationTask(option_2)
+        self.assertEqual(task_2.getInitFlag(), 0)
+        output_2 = [
+            task_2.Estimate(cv2.imread(image_path), box) for box in output_1
+        ]
+        # TODO: add expected outputs to support test with (e.g. Windows)
+        output_e = [
+            [
+                bianbuai.PosePoint(1417, 461, 0.245499),
+                bianbuai.PosePoint(1444, 451, 0.355974),
+                bianbuai.PosePoint(1403, 438, 0.368249),
+                bianbuai.PosePoint(1382, 447, 0.306874),
+                bianbuai.PosePoint(1380, 451, 0.306874),
+                bianbuai.PosePoint(1421, 486, 0.257774),
+                bianbuai.PosePoint(1384, 509, 0.282324),
+                bianbuai.PosePoint(1492, 562, 0.147300),
+                bianbuai.PosePoint(1384, 564, 0.152968),
+                bianbuai.PosePoint(1392, 680, 0.098200),
+                bianbuai.PosePoint(1382, 670, 0.159575),
+                bianbuai.PosePoint(1440, 602, 0.191210),
+                bianbuai.PosePoint(1421, 602, 0.152968),
+                bianbuai.PosePoint(1452, 608, 0.140221),
+                bianbuai.PosePoint(1448, 684, 0.171850),
+                bianbuai.PosePoint(1473, 763, 0.355974),
+                bianbuai.PosePoint(1473, 765, 0.319149),
+            ],
+            [
+                bianbuai.PosePoint( 596, 446, 0.484399),
+                bianbuai.PosePoint( 598, 447, 0.503274),
+                bianbuai.PosePoint( 592, 449, 0.515549),
+                bianbuai.PosePoint( 609, 451, 0.552374),
+                bianbuai.PosePoint( 592, 443, 0.589199),
+                bianbuai.PosePoint( 617, 466, 0.478724),
+                bianbuai.PosePoint( 593, 467, 0.540099),
+                bianbuai.PosePoint( 640, 505, 0.368249),
+                bianbuai.PosePoint( 596, 503, 0.380524),
+                bianbuai.PosePoint( 605, 521, 0.233224),
+                bianbuai.PosePoint( 605, 524, 0.282324),
+                bianbuai.PosePoint( 624, 546, 0.380524),
+                bianbuai.PosePoint( 614, 541, 0.380524),
+                bianbuai.PosePoint( 648, 601, 0.441899),
+                bianbuai.PosePoint( 622, 601, 0.380524),
+                bianbuai.PosePoint( 641, 664, 0.601474),
+                bianbuai.PosePoint( 635, 664, 0.552374),
+            ],
+            [
+                bianbuai.PosePoint(1532, 458, 0.233224),
+                bianbuai.PosePoint(1557, 455, 0.233224),
+                bianbuai.PosePoint(1506, 446, 0.343699),
+                bianbuai.PosePoint(1478, 431, 0.208674),
+                bianbuai.PosePoint(1473, 440, 0.245499),
+                bianbuai.PosePoint(1513, 495, 0.110475),
+                bianbuai.PosePoint(1499, 491, 0.171850),
+                bianbuai.PosePoint(1574, 583, 0.073650),
+                bianbuai.PosePoint(1423, 590, 0.085925),
+                bianbuai.PosePoint(1565, 835, 0.061375),
+                bianbuai.PosePoint(1460, 677, 0.089231),
+                bianbuai.PosePoint(1521, 589, 0.089231),
+                bianbuai.PosePoint(1504, 596, 0.101979),
+                bianbuai.PosePoint(1556, 607, 0.135025),
+                bianbuai.PosePoint(1488, 609, 0.135025),
+                bianbuai.PosePoint(1583, 768, 0.331424),
+                bianbuai.PosePoint(1576, 756, 0.306874),
+            ],
+            [
+                bianbuai.PosePoint( 493, 475, 0.687398),
+                bianbuai.PosePoint( 492, 470, 0.724223),
+                bianbuai.PosePoint( 484, 471, 0.736498),
+                bianbuai.PosePoint( 499, 471, 0.724223),
+                bianbuai.PosePoint( 477, 475, 0.773323),
+                bianbuai.PosePoint( 514, 497, 0.724223),
+                bianbuai.PosePoint( 468, 505, 0.711948),
+                bianbuai.PosePoint( 532, 528, 0.589199),
+                bianbuai.PosePoint( 459, 543, 0.589199),
+                bianbuai.PosePoint( 536, 537, 0.331431),
+                bianbuai.PosePoint( 468, 537, 0.343699),
+                bianbuai.PosePoint( 518, 572, 0.527824),
+                bianbuai.PosePoint( 489, 579, 0.497146),
+                bianbuai.PosePoint( 528, 637, 0.564649),
+                bianbuai.PosePoint( 496, 637, 0.626023),
+                bianbuai.PosePoint( 536, 689, 0.589199),
+                bianbuai.PosePoint( 500, 697, 0.687398),
+            ],
+        ] if self._arch == "x86_64" else [
+            [
+                bianbuai.PosePoint(1409, 562, 0.306874),
+                bianbuai.PosePoint(1415, 457, 0.282324),
+                bianbuai.PosePoint(1401, 550, 0.319149),
+                bianbuai.PosePoint(1448, 475, 0.319149),
+                bianbuai.PosePoint(1382, 473, 0.280441),
+                bianbuai.PosePoint(1456, 502, 0.140221),
+                bianbuai.PosePoint(1390, 502, 0.147300),
+                bianbuai.PosePoint(1527, 525, 0.101979),
+                bianbuai.PosePoint(1359, 573, 0.127473),
+                bianbuai.PosePoint(1492, 351, 0.073650),
+                bianbuai.PosePoint(1374, 351, 0.114726),
+                bianbuai.PosePoint(1415, 428, 0.127473),
+                bianbuai.PosePoint(1423, 444, 0.135025),
+                bianbuai.PosePoint(1450, 616, 0.184125),
+                bianbuai.PosePoint(1398, 639, 0.184125),
+                bianbuai.PosePoint(1522, 722, 0.355974),
+                bianbuai.PosePoint(1481, 728, 0.294599),
+            ],
+            [
+                bianbuai.PosePoint( 611, 502, 0.257774),
+                bianbuai.PosePoint( 615, 491, 0.405074),
+                bianbuai.PosePoint( 605, 496, 0.355974),
+                bianbuai.PosePoint( 605, 490, 0.380524),
+                bianbuai.PosePoint( 587, 491, 0.319149),
+                bianbuai.PosePoint( 623, 470, 0.208674),
+                bianbuai.PosePoint( 594, 464, 0.159575),
+                bianbuai.PosePoint( 605, 398, 0.135025),
+                bianbuai.PosePoint( 590, 398, 0.127473),
+                bianbuai.PosePoint( 603, 453, 0.135025),
+                bianbuai.PosePoint( 598, 398, 0.159575),
+                bianbuai.PosePoint( 631, 576, 0.280441),
+                bianbuai.PosePoint( 619, 572, 0.257774),
+                bianbuai.PosePoint( 650, 612, 0.254947),
+                bianbuai.PosePoint( 600, 615, 0.196400),
+                bianbuai.PosePoint( 645, 669, 0.454174),
+                bianbuai.PosePoint( 633, 668, 0.392799),
+            ],
+            [
+                bianbuai.PosePoint(1514, 551, 0.220949),
+                bianbuai.PosePoint(1512, 566, 0.245499),
+                bianbuai.PosePoint(1510, 431, 0.233224),
+                bianbuai.PosePoint(1488, 520, 0.245499),
+                bianbuai.PosePoint(1501, 540, 0.220949),
+                bianbuai.PosePoint(1584, 516, 0.147300),
+                bianbuai.PosePoint(1468, 494, 0.159575),
+                bianbuai.PosePoint(1579, 572, 0.089231),
+                bianbuai.PosePoint(1464, 609, 0.101979),
+                bianbuai.PosePoint(1560, 746, 0.063737),
+                bianbuai.PosePoint(1464, 638, 0.101979),
+                bianbuai.PosePoint(1522, 834, 0.147300),
+                bianbuai.PosePoint(1514, 834, 0.135025),
+                bianbuai.PosePoint(1551, 834, 0.110475),
+                bianbuai.PosePoint(1490, 614, 0.122750),
+                bianbuai.PosePoint(1581, 834, 0.306874),
+                bianbuai.PosePoint(1573, 834, 0.319149),
+            ],
+            [
+                bianbuai.PosePoint( 484, 478, 0.478724),
+                bianbuai.PosePoint( 490, 477, 0.540099),
+                bianbuai.PosePoint( 481, 480, 0.503274),
+                bianbuai.PosePoint( 494, 477, 0.576924),
+                bianbuai.PosePoint( 474, 480, 0.552374),
+                bianbuai.PosePoint( 501, 506, 0.478724),
+                bianbuai.PosePoint( 470, 508, 0.503274),
+                bianbuai.PosePoint( 522, 532, 0.306874),
+                bianbuai.PosePoint( 468, 541, 0.368249),
+                bianbuai.PosePoint( 530, 536, 0.171850),
+                bianbuai.PosePoint( 486, 562, 0.257774),
+                bianbuai.PosePoint( 511, 567, 0.380524),
+                bianbuai.PosePoint( 496, 569, 0.343699),
+                bianbuai.PosePoint( 522, 612, 0.319149),
+                bianbuai.PosePoint( 492, 619, 0.343699),
+                bianbuai.PosePoint( 519, 679, 0.490999),
+                bianbuai.PosePoint( 511, 685, 0.466449),
+            ],
+        ]
+        self.assertEqual(len(output_2), len(output_e))
+        """
+        for pose_points in output_2:
+            for pp in pose_points:
+                print("(%4d,%4d,%9.6f)," % (pp.x, pp.y, pp.score))
+        """
+        if self._arch == "AMD64":
+            print("Skip pose points check since expected outputs are not provided.")
+            return
+        for actual, expect in zip(output_2, output_e):
+            for actual_pp, expect_pp in zip(actual, expect):
+                self.assertEqual(actual_pp.x, expect_pp.x)
+                self.assertEqual(actual_pp.y, expect_pp.y)
+                self.assertAlmostEqual(actual_pp.score, expect_pp.score, delta=1e-5)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=1)
 @ -1 +1 @@
 .0.13
 .0.15
				`@ -0,0 +1 @@`
				`from bianbuai.capi._pybind_state import * # noqa`