Refactor project structure

This commit is contained in:
2026-03-28 19:56:39 +11:00
parent 1d267378b2
commit 8a2e721058
511 changed files with 59 additions and 48 deletions

View File

@@ -0,0 +1,15 @@
#pragma once
#include <gflags/gflags.h>
DECLARE_string(input);
DECLARE_string(type);
DECLARE_string(output_dir);
DECLARE_string(det_model_dir);
DECLARE_string(cls_model_dir);
DECLARE_string(rec_model_dir);
DECLARE_string(lay_model_dir);
DECLARE_string(tab_model_dir);
DECLARE_string(label_dir);
DECLARE_string(layout_dict_dir);
DECLARE_string(table_dict_dir);

View File

@@ -0,0 +1,425 @@
/*******************************************************************************
* *
* Author : Angus Johnson *
* Version : 6.4.2 *
* Date : 27 February 2017 *
* Website : http://www.angusj.com *
* Copyright : Angus Johnson 2010-2017 *
* *
* License: *
* Use, modification & distribution is subject to Boost Software License Ver 1. *
* http://www.boost.org/LICENSE_1_0.txt *
* *
* Attributions: *
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
* "A generic solution to polygon clipping" *
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
* http://portal.acm.org/citation.cfm?id=129906 *
* *
* Computer graphics and geometric modeling: implementation and algorithms *
* By Max K. Agoston *
* Springer; 1 edition (January 4, 2005) *
* http://books.google.com/books?q=vatti+clipping+agoston *
* *
* See also: *
* "Polygon Offsetting by Computing Winding Numbers" *
* Paper no. DETC2005-85513 pp. 565-575 *
* ASME 2005 International Design Engineering Technical Conferences *
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
* September 24-28, 2005 , Long Beach, California, USA *
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
* *
*******************************************************************************/
#pragma once
#ifndef clipper_hpp
#define clipper_hpp
#define CLIPPER_VERSION "6.4.2"
// use_int32: When enabled 32bit ints are used instead of 64bit ints. This
// improve performance but coordinate values are limited to the range +/- 46340
//#define use_int32
// use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
//#define use_xyz
// use_lines: Enables line clipping. Adds a very minor cost to performance.
#define use_lines
// use_deprecated: Enables temporary support for the obsolete functions
//#define use_deprecated
#include <cstdlib>
#include <cstring>
#include <functional>
#include <list>
#include <ostream>
#include <queue>
#include <set>
#include <stdexcept>
#include <vector>
namespace ClipperLib {
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
enum PolyType { ptSubject, ptClip };
// By far the most widely used winding rules for polygon filling are
// EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
// Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
// see http://glprogramming.com/red/chapter11.html
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
#ifdef use_int32
typedef int cInt;
static cInt const loRange = 0x7FFF;
static cInt const hiRange = 0x7FFF;
#else
typedef signed long long cInt;
static cInt const loRange = 0x3FFFFFFF;
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
typedef signed long long long64; // used by Int128 class
typedef unsigned long long ulong64;
#endif
struct IntPoint {
cInt X;
cInt Y;
#ifdef use_xyz
cInt Z;
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0) : X(x), Y(y), Z(z){};
#else
IntPoint(cInt x = 0, cInt y = 0) : X(x), Y(y){};
#endif
friend inline bool operator==(const IntPoint &a, const IntPoint &b) {
return a.X == b.X && a.Y == b.Y;
}
friend inline bool operator!=(const IntPoint &a, const IntPoint &b) {
return a.X != b.X || a.Y != b.Y;
}
};
//------------------------------------------------------------------------------
typedef std::vector<IntPoint> Path;
typedef std::vector<Path> Paths;
inline Path &operator<<(Path &poly, const IntPoint &p) {
poly.push_back(p);
return poly;
}
inline Paths &operator<<(Paths &polys, const Path &p) {
polys.push_back(p);
return polys;
}
std::ostream &operator<<(std::ostream &s, const IntPoint &p);
std::ostream &operator<<(std::ostream &s, const Path &p);
std::ostream &operator<<(std::ostream &s, const Paths &p);
struct DoublePoint {
double X;
double Y;
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
};
//------------------------------------------------------------------------------
#ifdef use_xyz
typedef void (*ZFillCallback)(IntPoint &e1bot, IntPoint &e1top, IntPoint &e2bot,
IntPoint &e2top, IntPoint &pt);
#endif
enum InitOptions {
ioReverseSolution = 1,
ioStrictlySimple = 2,
ioPreserveCollinear = 4
};
enum JoinType { jtSquare, jtRound, jtMiter };
enum EndType {
etClosedPolygon,
etClosedLine,
etOpenButt,
etOpenSquare,
etOpenRound
};
class PolyNode;
typedef std::vector<PolyNode *> PolyNodes;
class PolyNode {
public:
PolyNode();
virtual ~PolyNode(){};
Path Contour;
PolyNodes Childs;
PolyNode *Parent;
PolyNode *GetNext() const;
bool IsHole() const;
bool IsOpen() const;
int ChildCount() const;
private:
// PolyNode& operator =(PolyNode& other);
unsigned Index; // node index in Parent.Childs
bool m_IsOpen;
JoinType m_jointype;
EndType m_endtype;
PolyNode *GetNextSiblingUp() const;
void AddChild(PolyNode &child);
friend class Clipper; // to access Index
friend class ClipperOffset;
};
class PolyTree : public PolyNode {
public:
~PolyTree() { Clear(); };
PolyNode *GetFirst() const;
void Clear();
int Total() const;
private:
// PolyTree& operator =(PolyTree& other);
PolyNodes AllNodes;
friend class Clipper; // to access AllNodes
};
bool Orientation(const Path &poly);
double Area(const Path &poly);
int PointInPolygon(const IntPoint &pt, const Path &path);
void SimplifyPolygon(const Path &in_poly, Paths &out_polys,
PolyFillType fillType = pftEvenOdd);
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys,
PolyFillType fillType = pftEvenOdd);
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
void CleanPolygon(const Path &in_poly, Path &out_poly, double distance = 1.415);
void CleanPolygon(Path &poly, double distance = 1.415);
void CleanPolygons(const Paths &in_polys, Paths &out_polys,
double distance = 1.415);
void CleanPolygons(Paths &polys, double distance = 1.415);
void MinkowskiSum(const Path &pattern, const Path &path, Paths &solution,
bool pathIsClosed);
void MinkowskiSum(const Path &pattern, const Paths &paths, Paths &solution,
bool pathIsClosed);
void MinkowskiDiff(const Path &poly1, const Path &poly2, Paths &solution);
void PolyTreeToPaths(const PolyTree &polytree, Paths &paths);
void ClosedPathsFromPolyTree(const PolyTree &polytree, Paths &paths);
void OpenPathsFromPolyTree(PolyTree &polytree, Paths &paths);
void ReversePath(Path &p);
void ReversePaths(Paths &p);
struct IntRect {
cInt left;
cInt top;
cInt right;
cInt bottom;
};
// enums that are used internally ...
enum EdgeSide { esLeft = 1, esRight = 2 };
// forward declarations (for stuff used internally) ...
struct TEdge;
struct IntersectNode;
struct LocalMinimum;
struct OutPt;
struct OutRec;
struct Join;
typedef std::vector<OutRec *> PolyOutList;
typedef std::vector<TEdge *> EdgeList;
typedef std::vector<Join *> JoinList;
typedef std::vector<IntersectNode *> IntersectList;
//------------------------------------------------------------------------------
// ClipperBase is the ancestor to the Clipper class. It should not be
// instantiated directly. This class simply abstracts the conversion of sets of
// polygon coordinates into edge objects that are stored in a LocalMinima list.
class ClipperBase {
public:
ClipperBase();
virtual ~ClipperBase();
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
virtual void Clear();
IntRect GetBounds();
bool PreserveCollinear() { return m_PreserveCollinear; };
void PreserveCollinear(bool value) { m_PreserveCollinear = value; };
protected:
void DisposeLocalMinimaList();
TEdge *AddBoundsToLML(TEdge *e, bool IsClosed);
virtual void Reset();
TEdge *ProcessBound(TEdge *E, bool IsClockwise);
void InsertScanbeam(const cInt Y);
bool PopScanbeam(cInt &Y);
bool LocalMinimaPending();
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
OutRec *CreateOutRec();
void DisposeAllOutRecs();
void DisposeOutRec(PolyOutList::size_type index);
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
void DeleteFromAEL(TEdge *e);
void UpdateEdgeIntoAEL(TEdge *&e);
typedef std::vector<LocalMinimum> MinimaList;
MinimaList::iterator m_CurrentLM;
MinimaList m_MinimaList;
bool m_UseFullRange;
EdgeList m_edges;
bool m_PreserveCollinear;
bool m_HasOpenPaths;
PolyOutList m_PolyOuts;
TEdge *m_ActiveEdges;
typedef std::priority_queue<cInt> ScanbeamList;
ScanbeamList m_Scanbeam;
};
//------------------------------------------------------------------------------
class Clipper : public virtual ClipperBase {
public:
Clipper(int initOptions = 0);
bool Execute(ClipType clipType, Paths &solution,
PolyFillType fillType = pftEvenOdd);
bool Execute(ClipType clipType, Paths &solution, PolyFillType subjFillType,
PolyFillType clipFillType);
bool Execute(ClipType clipType, PolyTree &polytree,
PolyFillType fillType = pftEvenOdd);
bool Execute(ClipType clipType, PolyTree &polytree, PolyFillType subjFillType,
PolyFillType clipFillType);
bool ReverseSolution() { return m_ReverseOutput; };
void ReverseSolution(bool value) { m_ReverseOutput = value; };
bool StrictlySimple() { return m_StrictSimple; };
void StrictlySimple(bool value) { m_StrictSimple = value; };
// set the callback function for z value filling on intersections (otherwise Z
// is 0)
#ifdef use_xyz
void ZFillFunction(ZFillCallback zFillFunc);
#endif
protected:
virtual bool ExecuteInternal();
private:
JoinList m_Joins;
JoinList m_GhostJoins;
IntersectList m_IntersectList;
ClipType m_ClipType;
typedef std::list<cInt> MaximaList;
MaximaList m_Maxima;
TEdge *m_SortedEdges;
bool m_ExecuteLocked;
PolyFillType m_ClipFillType;
PolyFillType m_SubjFillType;
bool m_ReverseOutput;
bool m_UsingPolyTree;
bool m_StrictSimple;
#ifdef use_xyz
ZFillCallback m_ZFill; // custom callback
#endif
void SetWindingCount(TEdge &edge);
bool IsEvenOddFillType(const TEdge &edge) const;
bool IsEvenOddAltFillType(const TEdge &edge) const;
void InsertLocalMinimaIntoAEL(const cInt botY);
void InsertEdgeIntoAEL(TEdge *edge, TEdge *startEdge);
void AddEdgeToSEL(TEdge *edge);
bool PopEdgeFromSEL(TEdge *&edge);
void CopyAELToSEL();
void DeleteFromSEL(TEdge *e);
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
bool IsContributing(const TEdge &edge) const;
bool IsTopHorz(const cInt XPos);
void DoMaxima(TEdge *e);
void ProcessHorizontals();
void ProcessHorizontal(TEdge *horzEdge);
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
OutPt *AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
OutRec *GetOutRec(int idx);
void AppendPolygon(TEdge *e1, TEdge *e2);
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
OutPt *AddOutPt(TEdge *e, const IntPoint &pt);
OutPt *GetLastOutPt(TEdge *e);
bool ProcessIntersections(const cInt topY);
void BuildIntersectList(const cInt topY);
void ProcessIntersectList();
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
void BuildResult(Paths &polys);
void BuildResult2(PolyTree &polytree);
void SetHoleState(TEdge *e, OutRec *outrec);
void DisposeIntersectNodes();
bool FixupIntersectionOrder();
void FixupOutPolygon(OutRec &outrec);
void FixupOutPolyline(OutRec &outrec);
bool IsHole(TEdge *e);
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
void FixHoleLinkage(OutRec &outrec);
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
void ClearJoins();
void ClearGhostJoins();
void AddGhostJoin(OutPt *op, const IntPoint offPt);
bool JoinPoints(Join *j, OutRec *outRec1, OutRec *outRec2);
void JoinCommonEdges();
void DoSimplePolygons();
void FixupFirstLefts1(OutRec *OldOutRec, OutRec *NewOutRec);
void FixupFirstLefts2(OutRec *InnerOutRec, OutRec *OuterOutRec);
void FixupFirstLefts3(OutRec *OldOutRec, OutRec *NewOutRec);
#ifdef use_xyz
void SetZ(IntPoint &pt, TEdge &e1, TEdge &e2);
#endif
};
//------------------------------------------------------------------------------
class ClipperOffset {
public:
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
~ClipperOffset();
void AddPath(const Path &path, JoinType joinType, EndType endType);
void AddPaths(const Paths &paths, JoinType joinType, EndType endType);
void Execute(Paths &solution, double delta);
void Execute(PolyTree &solution, double delta);
void Clear();
double MiterLimit;
double ArcTolerance;
private:
Paths m_destPolys;
Path m_srcPoly;
Path m_destPoly;
std::vector<DoublePoint> m_normals;
double m_delta, m_sinA, m_sin, m_cos;
double m_miterLim, m_StepsPerRad;
IntPoint m_lowest;
PolyNode m_polyNodes;
void FixOrientations();
void DoOffset(double delta);
void OffsetPoint(int j, int &k, JoinType jointype);
void DoSquare(int j, int k);
void DoMiter(int j, int k, double r);
void DoRound(int j, int k);
};
//------------------------------------------------------------------------------
class clipperException : public std::exception {
public:
clipperException(const char *description) : m_descr(description) {}
virtual ~clipperException() throw() {}
virtual const char *what() const throw() { return m_descr.c_str(); }
private:
std::string m_descr;
};
//------------------------------------------------------------------------------
} // ClipperLib namespace
#endif // clipper_hpp

View File

@@ -0,0 +1,45 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h>
#include <include/postprocess_op.h>
#include <openvino/openvino.hpp>
#include <openvino/core/preprocess/pre_post_process.hpp>
namespace PaddleOCR {
class Classifier
{
public:
explicit Classifier(std::string model_path);
void Run(std::vector<cv::Mat> img_list, std::vector<OCRPredictResult>& ocr_results);
void SetParameters(int cls_batch_num, double cls_thresh);
void GetParameters(int& cls_batch_num, double& cls_thresh);
private:
ov::InferRequest infer_request;
std::string model_path;
std::shared_ptr<ov::Model> model;
ov::CompiledModel compiled_model;
std::recursive_mutex _mutex;
double e = 1.0 / 255.0;
std::vector<float> mean_ = { 0.5f, 0.5f, 0.5f };
std::vector<float> scale_ = { 0.5f, 0.5f, 0.5f };
int cls_batch_num_ = 1;
double cls_thresh = 0.9;
std::vector<size_t> cls_image_shape = { 3, 48, 192 };
std::string GetOpenVINODevice();
// resize
ClsResizeImg resize_op_;
};
}

View File

@@ -0,0 +1,71 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr_utility.h>
#include <include/preprocess_op.h>
#include <include/postprocess_op.h>
#include <openvino/openvino.hpp>
namespace PaddleOCR {
class Detector
{
public:
explicit Detector(std::string model_path);
void Run(const cv::Mat& src_img, std::vector<OCRPredictResult>& ocr_results);
void SetParameters(std::string limit_type,
std::string det_db_score_mode,
bool is_scale,
double det_db_thresh,
double det_db_box_thresh,
double det_db_unclip_ratio,
bool use_dilation);
void GetParameters(std::string& limit_type,
std::string& det_db_score_mode,
bool& is_scale,
double& det_db_thresh,
double& det_db_box_thresh,
double& det_db_unclip_ratio,
bool& use_dilation);
private:
ov::InferRequest infer_request;
std::string model_path;
cv::Mat src_img;
std::shared_ptr<ov::Model> model;
ov::CompiledModel compiled_model;
std::recursive_mutex _mutex;
float ratio_h{};
float ratio_w{};
std::vector<float> mean_ = { 0.485f, 0.456f, 0.406f };
std::vector<float> scale_ = { 1 / 0.229f, 1 / 0.224f, 1 / 0.225f };
cv::Mat resize_img;
double e = 1.0 / 255.0;
std::string limit_type_ = "max";
std::string det_db_score_mode_ = "slow";
int limit_side_len_ = 960;
bool is_scale_ = true;
double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.6;
double det_db_unclip_ratio_ = 1.5;
bool use_dilation_ = false;
// pre-process
ResizeImgType0 resize_op_;
Normalize normalize_op_;
Permute permute_op_;
// post-process
DBPostProcessor post_processor_;
};
}

View File

@@ -0,0 +1,46 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr_utility.h>
#include <include/preprocess_op.h>
#include <include/postprocess_op.h>
#include <openvino/openvino.hpp>
namespace PaddleOCR {
class Recognizer
{
public:
explicit Recognizer(std::string model_path, const std::string& label_path);
void Run(const std::vector<cv::Mat>& img_list, std::vector<OCRPredictResult>& ocr_results);
void SetParameters(int rec_batch_num);
void GetParameters(int& rec_batch_num);
private:
ov::InferRequest infer_request;
std::string model_path;
std::shared_ptr<ov::Model> model;
ov::CompiledModel compiled_model;
std::recursive_mutex _mutex;
std::vector<float> mean_ = { 0.5f, 0.5f, 0.5f };
std::vector<float> scale_ = { 1 / 0.5f, 1 / 0.5f, 1 / 0.5f };
bool is_scale_ = true;
std::vector<std::string> label_list_;
int rec_img_h_ = 48;
int rec_img_w_ = 320;
std::vector<int> rec_image_shape_ = { 3, rec_img_h_, rec_img_w_ };
int rec_batch_num_ = 1;
CrnnResizeImg resize_op_;
Normalize normalize_op_;
PermuteBatch permute_op_;
};
}

View File

@@ -0,0 +1,68 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h>
namespace PaddleOCR {
class PPOCR {
public:
explicit PPOCR();
~PPOCR();
std::vector<OCRPredictResult> ocr(const cv::Mat& img);
bool Initialize(std::string detectionModelDir, std::string classifierModelDir, std::string recognizerModelDir, std::string labelDir);
void SetParameters(std::string limit_type,
std::string det_db_score_mode,
bool is_scale,
double det_db_thresh,
double det_db_box_thresh,
double det_db_unclip_ratio,
bool use_dilation,
int cls_batch_num,
double cls_thresh,
int rec_batch_num);
void GetParameters(std::string& limit_type,
std::string& det_db_score_mode,
bool& is_scale,
double& det_db_thresh,
double& det_db_box_thresh,
double& det_db_unclip_ratio,
bool& use_dilation,
int& cls_batch_num,
double& cls_thresh,
int& rec_batch_num);
protected:
std::unique_ptr<Detector> detector_ = nullptr;
std::unique_ptr<Classifier> classifier_ = nullptr;
std::unique_ptr<Recognizer> recognizer_ = nullptr;
std::recursive_mutex _mutex;
std::string _limit_type;
std::string _det_db_score_mode;
bool _is_scale;
double _det_db_thresh;
double _det_db_box_thresh;
double _det_db_unclip_ratio;
bool _use_dilation;
int _cls_batch_num;
double _cls_thresh;
int _rec_batch_num;
};
} // namespace PaddleOCR

View File

@@ -0,0 +1,110 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <stdlib.h>
#include <vector>
#include <algorithm>
#include <cstring>
#include <fstream>
#include <numeric>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
namespace PaddleOCR {
struct OCRPredictResult {
std::vector<std::vector<int>> box;
std::string text;
float score = -1.0;
float cls_score;
int cls_label = -1;
};
struct StructurePredictResult {
std::vector<float> box;
std::vector<std::vector<int>> cell_box;
std::string type;
std::vector<OCRPredictResult> text_res;
std::string html;
float html_score = -1;
float confidence;
};
class Utility {
public:
static std::vector<std::string> ReadDict(const std::string &path);
static void VisualizeBboxes(const cv::Mat &srcimg,
const std::vector<OCRPredictResult> &ocr_result,
const std::string &save_path);
static void VisualizeBboxes(const cv::Mat &srcimg,
const StructurePredictResult &structure_result,
const std::string &save_path);
template <class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last));
}
static void GetAllFiles(const char *dir_name,
std::vector<std::string> &all_inputs);
static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box);
static std::vector<int> argsort(const std::vector<float> &array);
static std::string basename(const std::string &filename);
static bool PathExists(const std::string &path);
static void CreateDir(const std::string &path);
static void print_result(const std::vector<OCRPredictResult> &ocr_result);
static cv::Mat crop_image(cv::Mat &img, const std::vector<int> &area);
static cv::Mat crop_image(cv::Mat &img, const std::vector<float> &area);
static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);
static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
static float fast_exp(float x);
static std::vector<float>
activation_function_softmax(std::vector<float> &src);
static float iou(std::vector<int> &box1, std::vector<int> &box2);
static float iou(std::vector<float> &box1, std::vector<float> &box2);
private:
static bool comparison_box(const OCRPredictResult &result1,
const OCRPredictResult &result2) {
if (result1.box[0][1] < result2.box[0][1]) {
return true;
} else if (result1.box[0][1] == result2.box[0][1]) {
return result1.box[0][0] < result2.box[0][0];
} else {
return false;
}
}
};
} // namespace PaddleOCR

View File

@@ -0,0 +1,53 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <include/paddleocr.h>
#include <include/structure_layout.h>
#include <include/structure_table.h>
namespace PaddleOCR {
class PaddleStructure : public PPOCR {
public:
explicit PaddleStructure();
~PaddleStructure();
bool Initialize(std::string layModelDir, std::string layModelDic, std::string tabModelDir, std::string tabModelDic);
std::vector<StructurePredictResult> structure(cv::Mat img);
private:
Layout *layout_model_ = nullptr;
Table *table_model_ = nullptr;
std::string rebuild_table(std::vector<std::string> rec_html_tags,
std::vector<std::vector<int>> rec_boxes,
std::vector<OCRPredictResult> &ocr_result);
float dis(std::vector<int> &box1, std::vector<int> &box2);
static bool comparison_dis(const std::vector<float> &dis1,
const std::vector<float> &dis2) {
if (dis1[1] < dis2[1]) {
return true;
} else if (dis1[1] == dis2[1]) {
return dis1[0] < dis2[0];
} else {
return false;
}
}
};
} // namespace PaddleOCR

View File

@@ -0,0 +1,119 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "include/clipper.h"
#include "include/paddleocr_utility.h"
#include <openvino/openvino.hpp>
namespace PaddleOCR {
class DBPostProcessor {
public:
void GetContourArea(const std::vector<std::vector<float>> &box,
float unclip_ratio, float &distance);
cv::RotatedRect UnClip(std::vector<std::vector<float>> box,
const float &unclip_ratio);
float **Mat2Vec(cv::Mat mat);
std::vector<std::vector<int>>
OrderPointsClockwise(std::vector<std::vector<int>> pts);
std::vector<std::vector<float>> GetMiniBoxes(cv::RotatedRect box,
float &ssid);
float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
float PolygonScoreAcc(std::vector<cv::Point> contour, cv::Mat pred);
std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
const float &box_thresh, const float &det_db_unclip_ratio,
const std::string &det_db_score_mode);
std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
float ratio_h, float ratio_w, cv::Mat srcimg);
private:
static bool XsortInt(std::vector<int> a, std::vector<int> b);
static bool XsortFp32(std::vector<float> a, std::vector<float> b);
std::vector<std::vector<float>> Mat2Vector(cv::Mat mat);
inline int _max(int a, int b) { return a >= b ? a : b; }
inline int _min(int a, int b) { return a >= b ? b : a; }
template <class T> inline T clamp(T x, T min, T max) {
if (x > max)
return max;
if (x < min)
return min;
return x;
}
inline float clampf(float x, float min, float max) {
if (x > max)
return max;
if (x < min)
return min;
return x;
}
};
class TablePostProcessor {
public:
void init(std::string label_path, bool merge_no_span_structure = true);
void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
std::vector<float> &rec_scores, ov::Shape &loc_preds_shape,
ov::Shape &structure_probs_shape,
std::vector<std::vector<std::string>> &rec_html_tag_batch,
std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
std::vector<int> &width_list, std::vector<int> &height_list);
private:
std::vector<std::string> label_list_;
std::string end = "eos";
std::string beg = "sos";
};
class PicodetPostProcessor {
public:
void init(std::string label_path, const double score_threshold = 0.4,
const double nms_threshold = 0.5,
const std::vector<int> &fpn_stride = {8, 16, 32, 64});
void Run(std::vector<StructurePredictResult> &results,
std::vector<std::vector<float>> outs, std::vector<int> ori_shape,
std::vector<int> resize_shape, int eg_max);
std::vector<int> fpn_stride_ = {8, 16, 32, 64};
private:
StructurePredictResult disPred2Bbox(std::vector<float> bbox_pred, int label,
float score, int x, int y, int stride,
std::vector<int> im_shape, int reg_max);
void nms(std::vector<StructurePredictResult> &input_boxes,
float nms_threshold);
std::vector<std::string> label_list_;
double score_threshold_ = 0.4;
double nms_threshold_ = 0.5;
int num_class_ = 5;
};
} // namespace PaddleOCR

View File

@@ -0,0 +1,80 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
namespace PaddleOCR {
class Normalize {
public:
virtual void Run(cv::Mat *im, const std::vector<float> &mean,
const std::vector<float> &scale, const bool is_scale = true);
};
// RGB -> CHW
class Permute {
public:
virtual void Run(const cv::Mat *im, float *data);
};
class PermuteBatch {
public:
virtual void Run(const std::vector<cv::Mat> imgs, float *data);
};
class ResizeImgType0 {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
std::string limit_type, int limit_side_len, float &ratio_h,
float &ratio_w);
};
class CrnnResizeImg {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
const std::vector<int> &rec_image_shape = {3, 32, 320});
};
class ClsResizeImg {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
const std::vector<size_t> &rec_image_shape = {3, 48, 192});
};
class TableResizeImg {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
const int max_len = 488);
};
class TablePadImg {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
const int max_len = 488);
};
class Resize {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, const int h,
const int w);
};
} // namespace PaddleOCR

View File

@@ -0,0 +1,50 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr_utility.h>
#include <include/preprocess_op.h>
#include <include/postprocess_op.h>
#include <openvino/openvino.hpp>
#include <openvino/core/preprocess/pre_post_process.hpp>
namespace PaddleOCR {
class Layout
{
public:
explicit Layout(std::string model_path, std::string layout_dict_path);
void Run(cv::Mat &src_img, std::vector<StructurePredictResult> &structure_result);
private:
ov::InferRequest infer_request;
std::string model_path;
std::shared_ptr<ov::Model> model;
ov::CompiledModel compiled_model;
cv::Mat src_img;
cv::Mat resize_img;
double e = 1.0 / 255.0;
const int layout_img_h_ = 800;
const int layout_img_w_ = 608;
double layout_nms_threshold = 0.5;
double layout_score_threshold = 0.5;
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
// resize
Resize resize_op_;
// post-process
PicodetPostProcessor post_processor_;
};
}

View File

@@ -0,0 +1,56 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr_utility.h>
#include <include/preprocess_op.h>
#include <include/postprocess_op.h>
#include <openvino/openvino.hpp>
namespace PaddleOCR {
class Table
{
public:
explicit Table(std::string model_path, const std::string table_char_dict_path);
void Run(std::vector<cv::Mat> img_list,
std::vector<std::vector<std::string>> &structure_html_tags,
std::vector<float> &structure_scores,
std::vector<std::vector<std::vector<int>>> &structure_boxes);
private:
ov::InferRequest infer_request;
std::string model_path;
std::shared_ptr<ov::Model> model;
ov::CompiledModel compiled_model;
cv::Mat src_img;
cv::Mat resize_img;
const std::string table_char_dict_path;
int table_batch_num_ = 1;
int table_max_len_ = 488;
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
bool is_scale_ = true;
// pre-process
TableResizeImg resize_op_;
Normalize normalize_op_;
PermuteBatch permute_op_;
TablePadImg pad_op_;
// post-process
TablePostProcessor post_processor_;
};
}