20 #define HOCRDOCUMENT 1
23 #include "HOCRTextBox.h"
24 #include "resolution.h"
75 HOCRDocument(
const QImage &image, QStringList languages=QStringList()) {
read(image,languages);};
88 bool hasError()
const {
return !_error.isEmpty();};
96 QString
error()
const {
return _error; };
111 QSet<QString>
warnings()
const {
return _warnings;};
119 QSet<QString>
system()
const {
return _OCRSystem;};
136 QList<HOCRTextBox>
pages()
const {
return _pages;};
142 bool isEmpty()
const {
return _pages.isEmpty();};
163 if (_pages.size() > 0)
164 return _pages.takeFirst();
191 void read(
const QString& fileName);
210 void read(
const QImage &image,
const QStringList& languages=QStringList());
248 QString
toPDF(
const QString& fileName,
resolution _resolution,
const QString& title=QString(),
const QPageSize& overridePageSize=QPageSize(), QFont *overrideFont=0)
const;
264 QList<QImage>
toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8)
const;
308 QPageSize findPageSize(
int pageNumber,
resolution _resolution,
const QPageSize &overridePageSize)
const;
315 QSet<QString> _OCRSystem;
319 QSet<QString> _OCRCapabilities;
322 QList<HOCRTextBox> _pages;
325 QSet<QString> _warnings;
Reads and interprets HOCR files, the standard output file format for Optical Character Recognition sy...
HOCRDocument(QString fileName)
Constructs an HOCR document from a file.
HOCRDocument(QIODevice *device)
Constructs an HOCR document from a QIODevice.
QString toText() const
Export this document as text.
static bool areLanguagesSupportedByTesseract(const QStringList &lingos)
Check if languages are supported by tesseract.
QList< QImage > toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8) const
Export to images.
void read(const QImage &image, const QStringList &languages=QStringList())
Generates an HOCR document by running the tesseract OCR engine.
void read(QIODevice *device)
Reads an HOCR document from a QIODevice.
bool isEmpty() const
Returns true if the document contains no pages.
QString error() const
Error message.
HOCRDocument()
Constructs an empty HOCR document.
HOCRTextBox takeFirstPage()
Removes the first page of the document and returns it.
void read(const QString &fileName)
Reads an HOCR document from a file.
QFont suggestFont() const
Suggest font.
QSet< QString > system() const
System(s) that generated this file.
QSet< QString > warnings() const
Warning messages.
bool hasText() const
Check if the document does contain text.
HOCRDocument(const QImage &image, QStringList languages=QStringList())
Constructs an HOCR document by running the tesseract OCR engine.
void append(const HOCRDocument &other)
Appends other HOCRDocument.
static QStringList tesseractLanguages()
List of languages supported by tesseract.
bool hasWarnings() const
Warning status.
QList< HOCRTextBox > pages() const
Pages in the document.
bool hasError() const
Error status.
QString toPDF(const QString &fileName, resolution _resolution, const QString &title=QString(), const QPageSize &overridePageSize=QPageSize(), QFont *overrideFont=0) const
Export to PDF.
void clear()
Resets the document.
QSet< QString > capabilities() const
OCR capabilites.
Text box, as defined in an HOCR file.
The resolution class stores a resolution and converts between units.