Skip to content

Using Tesseract OCR iOS

Adrian edited this page Oct 16, 2017 · 8 revisions

Example Usage

Objective-C

Using Tesseract object

MyViewController.h

#import <TesseractOCR/TesseractOCR.h>
@interface MyViewController : UIViewController <G8TesseractDelegate>
@end

MyViewController.m

- (void)viewDidLoad
{
    [super viewDidLoad];

    // Languages are used for recognition (e.g. eng, ita, etc.). Tesseract engine
    // will search for the .traineddata language file in the tessdata directory. 
    // For example, specifying "eng+ita" will search for "eng.traineddata" and
    // "ita.traineddata". Cube engine will search for "eng.cube.*" files.
    // See https://github.com/tesseract-ocr/tessdata.

    // Create your G8Tesseract object using the initWithLanguage method:
    G8Tesseract *tesseract = [[G8Tesseract alloc] initWithLanguage:@"eng+ita"];

    // Optionaly: You could specify engine to recognize with. 
    // G8OCREngineModeTesseractOnly by default. It provides more features and faster
    // than Cube engine. See G8Constants.h for more information.
    //tesseract.engineMode = G8OCREngineModeTesseractOnly;

    // Set up the delegate to receive Tesseract's callbacks.
    // self should respond to TesseractDelegate and implement a
    // "- (BOOL)shouldCancelImageRecognitionForTesseract:(G8Tesseract *)tesseract"
    // method to receive a callback to decide whether or not to interrupt
    // Tesseract before it finishes a recognition.
    tesseract.delegate = self;

    // Optional: Limit the character set Tesseract should try to recognize from
    tesseract.charWhitelist = @"0123456789";

    // This is wrapper for common Tesseract variable kG8ParamTesseditCharWhitelist:
    // [tesseract setVariableValue:@"0123456789" forKey:kG8ParamTesseditCharBlacklist];
    // See G8TesseractParameters.h for a complete list of Tesseract variables

    // Optional: Limit the character set Tesseract should not try to recognize from
    //tesseract.charBlacklist = @"OoZzBbSs";

    // Specify the image Tesseract should recognize on
    tesseract.image = [[UIImage imageNamed:@"image_sample.jpg"] g8_blackAndWhite];

    // Optional: Limit the area of the image Tesseract should recognize on to a rectangle
    tesseract.rect = CGRectMake(20, 20, 100, 100);

    // Optional: Limit recognition time with a few seconds
    tesseract.maximumRecognitionTime = 2.0;

    // Start the recognition
    [tesseract recognize];

    // Retrieve the recognized text
    NSLog(@"%@", [tesseract recognizedText]);

    // You could retrieve more information about recognized text with that methods:
    NSArray *characterBoxes = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelSymbol];
    NSArray *paragraphs = [tesseract recognizedBlocksByIteratorLevel:G8PageIteratorLevelParagraph];
    NSArray *characterChoices = tesseract.characterChoices;
    UIImage *imageWithBlocks = [tesseract imageWithBlocks:characterBoxes drawText:YES thresholded:NO]; 
}

- (void)progressImageRecognitionForTesseract:(G8Tesseract *)tesseract {
    NSLog(@"progress: %lu", (unsigned long)tesseract.progress);
}

- (BOOL)shouldCancelImageRecognitionForTesseract:(G8Tesseract *)tesseract {
    return NO;  // return YES, if you need to interrupt tesseract before it finishes
}

Using NSOperationQueue

MyViewController.h

#import <TesseractOCR/TesseractOCR.h>
@interface MyViewController : UIViewController
@end

MyViewController.m

- (void)viewDidLoad
{
    // Create RecognitionOperation
    G8RecognitionOperation *operation = [[G8RecognitionOperation alloc] initWithLanguage:@"eng+ita"];

    // Configure inner G8Tesseract object as described before
    operation.tesseract.charWhitelist = @"01234567890";
    operation.tesseract.image = [[UIImage imageNamed:@"image_sample.jpg"] g8_blackAndWhite];

    // Setup the recognitionCompleteBlock to receive the Tesseract object
    // after text recognition. It will hold the recognized text.
    operation.recognitionCompleteBlock = ^(G8Tesseract *recognizedTesseract) {
        // Retrieve the recognized text upon completion
        NSLog(@"%@", [recognizedTesseract recognizedText]);
    };

    // Add operation to queue
    NSOperationQueue *queue = [[NSOperationQueue alloc] init];
    [queue addOperation:operation];
}

Swift

Make sure that you have used an Objective-C bridging header to include the library. Instructions on configuring a bridging header file can be found in the Apple Developer Library.

ViewController.swift

import UIKit
import TesseractOCR

class ViewController: UIViewController, G8TesseractDelegate {
    override func viewDidLoad() {
        super.viewDidLoad()

        var tesseract:G8Tesseract = G8Tesseract(language:"eng+ita")
        //tesseract.language = "eng+ita"
        tesseract.delegate = self
        tesseract.charWhitelist = "01234567890"
        tesseract.image = UIImage(named: "image_sample.jpg")
        tesseract.recognize()

        print(tesseract.recognizedText)
    }

    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
    }

    func shouldCancelImageRecognitionForTesseract(tesseract: G8Tesseract!) -> Bool {
        return false // return true if you need to interrupt tesseract before it finishes
    }
}