Edit on GitHub

leaf_focus.ocr.keras_ocr

OCR using keras-ocr.

View Source

  1"""OCR using keras-ocr."""
  2
  3from __future__ import annotations
  4
  5import logging
  6import os
  7import pathlib
  8
  9import numpy as np
 10
 11from beartype import beartype, typing
 12
 13from leaf_focus import utils
 14from leaf_focus.ocr import model
 15
 16
 17logger = logging.getLogger(__name__)
 18
 19
 20@beartype
 21class OpticalCharacterRecognition:
 22    """OCR implementation using keras-ocr."""
 23
 24    def __init__(self) -> None:
 25        """Create a new OpticalCharacterRecognition."""
 26        self._pipeline = None
 27
 28    def engine_create(self) -> None:
 29        """Create the OCR engine.
 30
 31        Returns:
 32            None
 33        """
 34        if self._pipeline is not None:
 35            return
 36
 37        logger.warning("Creating keras ocr processing engine.")
 38
 39        log_level = logger.getEffectiveLevel()
 40
 41        # set TF_CPP_MIN_LOG_LEVEL before importing tensorflow
 42        # this allows changing the logging printed by tensorflow
 43        tf_log_level_map = {
 44            logging.DEBUG: "0",
 45            logging.INFO: "1",
 46            logging.WARNING: "2",
 47            logging.ERROR: "3",
 48        }
 49        os.environ["TF_CPP_MIN_LOG_LEVEL"] = tf_log_level_map.get(log_level, "1")
 50
 51        import tensorflow as tf
 52
 53        # also set the tf logger level
 54
 55        tf.get_logger().setLevel(log_level)
 56
 57        # check the CPU / GPU in use
 58        gpus = tf.config.list_physical_devices("GPU")
 59        logger.info("GPUs in use: '%s'.", gpus)
 60
 61        import keras_ocr
 62
 63        # TODO: allow specifying path to weights files for detector
 64        # detector_weights_path = ""
 65        # detector = keras_ocr.detection.Detector(weights=None)
 66        # detector.model = keras_ocr.detection.build_keras_model(
 67        #     weights_path=detector_weights_path, backbone_name="vgg"
 68        # )
 69        # detector.model.compile(loss="mse", optimizer="adam")
 70        detector = None
 71
 72        # TODO: allow specifying path to weights files for recogniser
 73        # recognizer_weights_path = ""
 74        # recognizer = keras_ocr.recognition.Recognizer(
 75        #     alphabet=keras_ocr.recognition.DEFAULT_ALPHABET, weights=None
 76        # )
 77        # recognizer.model.load_weights(recognizer_weights_path)
 78        recognizer = None
 79
 80        # see: https://github.com/faustomorales/keras-ocr
 81        # keras-ocr will automatically download pretrained
 82        # weights for the detector and recognizer.
 83        self._pipeline = keras_ocr.pipeline.Pipeline(
 84            detector=detector,
 85            recognizer=recognizer,
 86        )
 87
 88    def engine_run(
 89        self,
 90        image_file: pathlib.Path,
 91    ) -> tuple[list[typing.Any], typing.Any]:
 92        """Run the recognition engine.
 93
 94        Args:
 95            image_file: The path to the image file.
 96
 97        Returns:
 98            typing.Tuple[typing.List, typing.Any]: The list of images
 99                and list of recognition results.
100        """
101        import keras_ocr
102
103        self.engine_create()
104
105        if not self._pipeline:
106            msg = "Keras OCR pipeline has not been initialised yet."
107            logger.error(msg)
108            raise utils.LeafFocusError(msg)
109
110        images = [keras_ocr.tools.read(str(image_file))]
111        return images, self._pipeline.recognize(images)
112
113    def engine_annotate(  # type: ignore [no-untyped-def]
114        self,
115        image: np.ndarray | None,  # type: ignore [type-arg]
116        predictions: list[tuple[typing.Any, typing.Any]],
117        axis,  # noqa: ANN001
118    ) -> None:
119        """Run the annotation engine.
120
121        Args:
122            image: The image data.
123            predictions: The recognised text from the image.
124            axis: The plot axis for drawing annotations.
125
126        Returns:
127            None
128        """
129        import keras_ocr
130
131        keras_ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=axis)
132
133    def recognise_text(
134        self,
135        image_file: pathlib.Path,
136        output_dir: pathlib.Path,
137    ) -> model.KerasOcrResult:
138        """Recognise text in an image file.
139
140        Args:
141            image_file: The path to the image file.
142            output_dir: The directory to write the results.
143
144        Returns:
145            model.KerasOcrResult: The text recognition results.
146        """
147        if not image_file:
148            msg = "Must supply image file."
149            raise utils.LeafFocusError(msg)
150        if not output_dir:
151            msg = "Must supply output directory."
152            raise utils.LeafFocusError(msg)
153        if not image_file.exists():
154            msg = f"Image file does not exist '{image_file}'."
155            raise utils.LeafFocusError(msg) from FileNotFoundError(image_file)
156
157        # check if output files already exist
158        annotations_file = utils.output_root(image_file, "annotations", output_dir)
159        annotations_file = annotations_file.with_suffix(".png")
160
161        predictions_file = utils.output_root(image_file, "predictions", output_dir)
162        predictions_file = predictions_file.with_suffix(".csv")
163
164        result = model.KerasOcrResult(
165            output_dir=output_dir,
166            annotations_file=annotations_file,
167            predictions_file=predictions_file,
168            items=[],
169        )
170
171        if annotations_file.exists() and predictions_file.exists():
172            logger.debug(
173                "Predictions and annotations files already exist for '%s'.",
174                image_file.stem,
175            )
176            all_items = list(model.TextItem.load(predictions_file))
177            result.items = model.TextItem.order_text_lines(all_items)
178            return result
179
180        # read in the image
181        logger.debug(
182            "Creating predictions and annotations files for '%s'.",
183            image_file.stem,
184        )
185
186        # Each list of predictions in prediction_groups is a list of
187        # (word, box) tuples.
188        images, prediction_groups = self.engine_run(image_file)
189
190        # Plot and save the predictions
191        for image, predictions in zip(images, prediction_groups, strict=False):
192            self.save_figure(annotations_file, image, predictions)
193
194            items = self.convert_predictions(predictions)
195            self.save_items(predictions_file, [item for line in items for item in line])
196            result.items = items
197
198        return result
199
200    def save_figure(
201        self,
202        annotation_file: pathlib.Path,
203        image: np.ndarray | None,  # type: ignore [type-arg]
204        predictions: list[tuple[typing.Any, typing.Any]],
205    ) -> None:
206        """Save the annotated image.
207
208        Args:
209            annotation_file: The path to the file containing annotations.
210            image: The image data.
211            predictions: The text recognition results.
212
213        Returns:
214            None
215        """
216        if not annotation_file:
217            msg = "Must supply annotation file."
218            raise utils.LeafFocusError(msg)
219
220        expected_image_shape = 3
221        if image is None or image.size < 1 or len(image.shape) != expected_image_shape:
222            msg_image = image.shape if image is not None else None
223            msg = f"Must supply valid image data, not '{msg_image}'."
224            raise utils.LeafFocusError(msg)
225        if not predictions:
226            predictions = []
227
228        logger.info("Saving OCR image to '%s'.", annotation_file)
229
230        import matplotlib as mpl
231
232        from matplotlib import pyplot as plt
233
234        mpl.use("agg")
235
236        annotation_file.parent.mkdir(exist_ok=True, parents=True)
237
238        fig, axis = plt.subplots(figsize=(20, 20))
239
240        self.engine_annotate(image, predictions, axis)
241
242        fig.savefig(str(annotation_file))
243        plt.close(fig)
244
245    def convert_predictions(
246        self,
247        predictions: list[tuple[typing.Any, typing.Any]],
248    ) -> list[list[model.TextItem]]:
249        """Convert predictions to items.
250
251        Args:
252            predictions: The list of recognised text.
253
254        Returns:
255            typing.List[typing.List[model.TextItem]]: The equivalent text items.
256        """
257        if not predictions:
258            predictions = []
259
260        items = [
261            model.TextItem.from_prediction(prediction) for prediction in predictions
262        ]
263
264        # order_text_lines sets the line number and line order
265        line_items = model.TextItem.order_text_lines(items)
266
267        return line_items
268
269    def save_items(
270        self,
271        items_file: pathlib.Path,
272        items: typing.Iterable[model.TextItem],
273    ) -> None:
274        """Save items to csv file.
275
276        Args:
277            items_file: Write the text items to this file.
278            items: The text items to save.
279
280        Returns:
281            None
282        """
283        if not items_file:
284            msg = "Must supply predictions file."
285            raise utils.LeafFocusError(msg)
286        if not items:
287            msg = "Must supply predictions data."
288            raise utils.LeafFocusError(msg)
289
290        logger.info("Saving OCR predictions to '%s'.", items_file)
291
292        items_list = list(items)
293        model.TextItem.save(items_file, items_list)
294
295    def _build_name(self, prefix: str, middle: str, suffix: str) -> str:
296        """Build the file name.
297
298        Args:
299            prefix: The text to add at the start.
300            middle: The text to add in the middle.
301            suffix: The text to add at the end.
302
303        Returns:
304            str: The built name.
305        """
306        prefix = prefix.strip("-")
307        middle = middle.strip("-")
308        suffix = suffix if suffix.startswith(".") else "." + suffix
309        return f"{prefix}-{middle}" + suffix

logger = <Logger leaf_focus.ocr.keras_ocr (WARNING)>

@beartype

class OpticalCharacterRecognition: View Source

 21@beartype
 22class OpticalCharacterRecognition:
 23    """OCR implementation using keras-ocr."""
 24
 25    def __init__(self) -> None:
 26        """Create a new OpticalCharacterRecognition."""
 27        self._pipeline = None
 28
 29    def engine_create(self) -> None:
 30        """Create the OCR engine.
 31
 32        Returns:
 33            None
 34        """
 35        if self._pipeline is not None:
 36            return
 37
 38        logger.warning("Creating keras ocr processing engine.")
 39
 40        log_level = logger.getEffectiveLevel()
 41
 42        # set TF_CPP_MIN_LOG_LEVEL before importing tensorflow
 43        # this allows changing the logging printed by tensorflow
 44        tf_log_level_map = {
 45            logging.DEBUG: "0",
 46            logging.INFO: "1",
 47            logging.WARNING: "2",
 48            logging.ERROR: "3",
 49        }
 50        os.environ["TF_CPP_MIN_LOG_LEVEL"] = tf_log_level_map.get(log_level, "1")
 51
 52        import tensorflow as tf
 53
 54        # also set the tf logger level
 55
 56        tf.get_logger().setLevel(log_level)
 57
 58        # check the CPU / GPU in use
 59        gpus = tf.config.list_physical_devices("GPU")
 60        logger.info("GPUs in use: '%s'.", gpus)
 61
 62        import keras_ocr
 63
 64        # TODO: allow specifying path to weights files for detector
 65        # detector_weights_path = ""
 66        # detector = keras_ocr.detection.Detector(weights=None)
 67        # detector.model = keras_ocr.detection.build_keras_model(
 68        #     weights_path=detector_weights_path, backbone_name="vgg"
 69        # )
 70        # detector.model.compile(loss="mse", optimizer="adam")
 71        detector = None
 72
 73        # TODO: allow specifying path to weights files for recogniser
 74        # recognizer_weights_path = ""
 75        # recognizer = keras_ocr.recognition.Recognizer(
 76        #     alphabet=keras_ocr.recognition.DEFAULT_ALPHABET, weights=None
 77        # )
 78        # recognizer.model.load_weights(recognizer_weights_path)
 79        recognizer = None
 80
 81        # see: https://github.com/faustomorales/keras-ocr
 82        # keras-ocr will automatically download pretrained
 83        # weights for the detector and recognizer.
 84        self._pipeline = keras_ocr.pipeline.Pipeline(
 85            detector=detector,
 86            recognizer=recognizer,
 87        )
 88
 89    def engine_run(
 90        self,
 91        image_file: pathlib.Path,
 92    ) -> tuple[list[typing.Any], typing.Any]:
 93        """Run the recognition engine.
 94
 95        Args:
 96            image_file: The path to the image file.
 97
 98        Returns:
 99            typing.Tuple[typing.List, typing.Any]: The list of images
100                and list of recognition results.
101        """
102        import keras_ocr
103
104        self.engine_create()
105
106        if not self._pipeline:
107            msg = "Keras OCR pipeline has not been initialised yet."
108            logger.error(msg)
109            raise utils.LeafFocusError(msg)
110
111        images = [keras_ocr.tools.read(str(image_file))]
112        return images, self._pipeline.recognize(images)
113
114    def engine_annotate(  # type: ignore [no-untyped-def]
115        self,
116        image: np.ndarray | None,  # type: ignore [type-arg]
117        predictions: list[tuple[typing.Any, typing.Any]],
118        axis,  # noqa: ANN001
119    ) -> None:
120        """Run the annotation engine.
121
122        Args:
123            image: The image data.
124            predictions: The recognised text from the image.
125            axis: The plot axis for drawing annotations.
126
127        Returns:
128            None
129        """
130        import keras_ocr
131
132        keras_ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=axis)
133
134    def recognise_text(
135        self,
136        image_file: pathlib.Path,
137        output_dir: pathlib.Path,
138    ) -> model.KerasOcrResult:
139        """Recognise text in an image file.
140
141        Args:
142            image_file: The path to the image file.
143            output_dir: The directory to write the results.
144
145        Returns:
146            model.KerasOcrResult: The text recognition results.
147        """
148        if not image_file:
149            msg = "Must supply image file."
150            raise utils.LeafFocusError(msg)
151        if not output_dir:
152            msg = "Must supply output directory."
153            raise utils.LeafFocusError(msg)
154        if not image_file.exists():
155            msg = f"Image file does not exist '{image_file}'."
156            raise utils.LeafFocusError(msg) from FileNotFoundError(image_file)
157
158        # check if output files already exist
159        annotations_file = utils.output_root(image_file, "annotations", output_dir)
160        annotations_file = annotations_file.with_suffix(".png")
161
162        predictions_file = utils.output_root(image_file, "predictions", output_dir)
163        predictions_file = predictions_file.with_suffix(".csv")
164
165        result = model.KerasOcrResult(
166            output_dir=output_dir,
167            annotations_file=annotations_file,
168            predictions_file=predictions_file,
169            items=[],
170        )
171
172        if annotations_file.exists() and predictions_file.exists():
173            logger.debug(
174                "Predictions and annotations files already exist for '%s'.",
175                image_file.stem,
176            )
177            all_items = list(model.TextItem.load(predictions_file))
178            result.items = model.TextItem.order_text_lines(all_items)
179            return result
180
181        # read in the image
182        logger.debug(
183            "Creating predictions and annotations files for '%s'.",
184            image_file.stem,
185        )
186
187        # Each list of predictions in prediction_groups is a list of
188        # (word, box) tuples.
189        images, prediction_groups = self.engine_run(image_file)
190
191        # Plot and save the predictions
192        for image, predictions in zip(images, prediction_groups, strict=False):
193            self.save_figure(annotations_file, image, predictions)
194
195            items = self.convert_predictions(predictions)
196            self.save_items(predictions_file, [item for line in items for item in line])
197            result.items = items
198
199        return result
200
201    def save_figure(
202        self,
203        annotation_file: pathlib.Path,
204        image: np.ndarray | None,  # type: ignore [type-arg]
205        predictions: list[tuple[typing.Any, typing.Any]],
206    ) -> None:
207        """Save the annotated image.
208
209        Args:
210            annotation_file: The path to the file containing annotations.
211            image: The image data.
212            predictions: The text recognition results.
213
214        Returns:
215            None
216        """
217        if not annotation_file:
218            msg = "Must supply annotation file."
219            raise utils.LeafFocusError(msg)
220
221        expected_image_shape = 3
222        if image is None or image.size < 1 or len(image.shape) != expected_image_shape:
223            msg_image = image.shape if image is not None else None
224            msg = f"Must supply valid image data, not '{msg_image}'."
225            raise utils.LeafFocusError(msg)
226        if not predictions:
227            predictions = []
228
229        logger.info("Saving OCR image to '%s'.", annotation_file)
230
231        import matplotlib as mpl
232
233        from matplotlib import pyplot as plt
234
235        mpl.use("agg")
236
237        annotation_file.parent.mkdir(exist_ok=True, parents=True)
238
239        fig, axis = plt.subplots(figsize=(20, 20))
240
241        self.engine_annotate(image, predictions, axis)
242
243        fig.savefig(str(annotation_file))
244        plt.close(fig)
245
246    def convert_predictions(
247        self,
248        predictions: list[tuple[typing.Any, typing.Any]],
249    ) -> list[list[model.TextItem]]:
250        """Convert predictions to items.
251
252        Args:
253            predictions: The list of recognised text.
254
255        Returns:
256            typing.List[typing.List[model.TextItem]]: The equivalent text items.
257        """
258        if not predictions:
259            predictions = []
260
261        items = [
262            model.TextItem.from_prediction(prediction) for prediction in predictions
263        ]
264
265        # order_text_lines sets the line number and line order
266        line_items = model.TextItem.order_text_lines(items)
267
268        return line_items
269
270    def save_items(
271        self,
272        items_file: pathlib.Path,
273        items: typing.Iterable[model.TextItem],
274    ) -> None:
275        """Save items to csv file.
276
277        Args:
278            items_file: Write the text items to this file.
279            items: The text items to save.
280
281        Returns:
282            None
283        """
284        if not items_file:
285            msg = "Must supply predictions file."
286            raise utils.LeafFocusError(msg)
287        if not items:
288            msg = "Must supply predictions data."
289            raise utils.LeafFocusError(msg)
290
291        logger.info("Saving OCR predictions to '%s'.", items_file)
292
293        items_list = list(items)
294        model.TextItem.save(items_file, items_list)
295
296    def _build_name(self, prefix: str, middle: str, suffix: str) -> str:
297        """Build the file name.
298
299        Args:
300            prefix: The text to add at the start.
301            middle: The text to add in the middle.
302            suffix: The text to add at the end.
303
304        Returns:
305            str: The built name.
306        """
307        prefix = prefix.strip("-")
308        middle = middle.strip("-")
309        suffix = suffix if suffix.startswith(".") else "." + suffix
310        return f"{prefix}-{middle}" + suffix

OCR implementation using keras-ocr.

OpticalCharacterRecognition() View Source

25    def __init__(self) -> None:
26        """Create a new OpticalCharacterRecognition."""
27        self._pipeline = None

Create a new OpticalCharacterRecognition.

def engine_create(self) -> None: View Source

29    def engine_create(self) -> None:
30        """Create the OCR engine.
31
32        Returns:
33            None
34        """
35        if self._pipeline is not None:
36            return
37
38        logger.warning("Creating keras ocr processing engine.")
39
40        log_level = logger.getEffectiveLevel()
41
42        # set TF_CPP_MIN_LOG_LEVEL before importing tensorflow
43        # this allows changing the logging printed by tensorflow
44        tf_log_level_map = {
45            logging.DEBUG: "0",
46            logging.INFO: "1",
47            logging.WARNING: "2",
48            logging.ERROR: "3",
49        }
50        os.environ["TF_CPP_MIN_LOG_LEVEL"] = tf_log_level_map.get(log_level, "1")
51
52        import tensorflow as tf
53
54        # also set the tf logger level
55
56        tf.get_logger().setLevel(log_level)
57
58        # check the CPU / GPU in use
59        gpus = tf.config.list_physical_devices("GPU")
60        logger.info("GPUs in use: '%s'.", gpus)
61
62        import keras_ocr
63
64        # TODO: allow specifying path to weights files for detector
65        # detector_weights_path = ""
66        # detector = keras_ocr.detection.Detector(weights=None)
67        # detector.model = keras_ocr.detection.build_keras_model(
68        #     weights_path=detector_weights_path, backbone_name="vgg"
69        # )
70        # detector.model.compile(loss="mse", optimizer="adam")
71        detector = None
72
73        # TODO: allow specifying path to weights files for recogniser
74        # recognizer_weights_path = ""
75        # recognizer = keras_ocr.recognition.Recognizer(
76        #     alphabet=keras_ocr.recognition.DEFAULT_ALPHABET, weights=None
77        # )
78        # recognizer.model.load_weights(recognizer_weights_path)
79        recognizer = None
80
81        # see: https://github.com/faustomorales/keras-ocr
82        # keras-ocr will automatically download pretrained
83        # weights for the detector and recognizer.
84        self._pipeline = keras_ocr.pipeline.Pipeline(
85            detector=detector,
86            recognizer=recognizer,
87        )

Create the OCR engine.

Returns:

None

def engine_run(self, image_file: pathlib.Path) -> tuple[list[typing.Any], typing.Any]: View Source

 89    def engine_run(
 90        self,
 91        image_file: pathlib.Path,
 92    ) -> tuple[list[typing.Any], typing.Any]:
 93        """Run the recognition engine.
 94
 95        Args:
 96            image_file: The path to the image file.
 97
 98        Returns:
 99            typing.Tuple[typing.List, typing.Any]: The list of images
100                and list of recognition results.
101        """
102        import keras_ocr
103
104        self.engine_create()
105
106        if not self._pipeline:
107            msg = "Keras OCR pipeline has not been initialised yet."
108            logger.error(msg)
109            raise utils.LeafFocusError(msg)
110
111        images = [keras_ocr.tools.read(str(image_file))]
112        return images, self._pipeline.recognize(images)

Run the recognition engine.

Arguments:

image_file: The path to the image file.

Returns:

typing.Tuple[typing.List, typing.Any]: The list of images and list of recognition results.

def engine_annotate( self, image: numpy.ndarray | None, predictions: list[tuple[typing.Any, typing.Any]], axis) -> None: View Source

114    def engine_annotate(  # type: ignore [no-untyped-def]
115        self,
116        image: np.ndarray | None,  # type: ignore [type-arg]
117        predictions: list[tuple[typing.Any, typing.Any]],
118        axis,  # noqa: ANN001
119    ) -> None:
120        """Run the annotation engine.
121
122        Args:
123            image: The image data.
124            predictions: The recognised text from the image.
125            axis: The plot axis for drawing annotations.
126
127        Returns:
128            None
129        """
130        import keras_ocr
131
132        keras_ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=axis)

Run the annotation engine.

Arguments:

image: The image data.
predictions: The recognised text from the image.
axis: The plot axis for drawing annotations.

Returns:

None

def recognise_text( self, image_file: pathlib.Path, output_dir: pathlib.Path) -> leaf_focus.ocr.model.KerasOcrResult: View Source

134    def recognise_text(
135        self,
136        image_file: pathlib.Path,
137        output_dir: pathlib.Path,
138    ) -> model.KerasOcrResult:
139        """Recognise text in an image file.
140
141        Args:
142            image_file: The path to the image file.
143            output_dir: The directory to write the results.
144
145        Returns:
146            model.KerasOcrResult: The text recognition results.
147        """
148        if not image_file:
149            msg = "Must supply image file."
150            raise utils.LeafFocusError(msg)
151        if not output_dir:
152            msg = "Must supply output directory."
153            raise utils.LeafFocusError(msg)
154        if not image_file.exists():
155            msg = f"Image file does not exist '{image_file}'."
156            raise utils.LeafFocusError(msg) from FileNotFoundError(image_file)
157
158        # check if output files already exist
159        annotations_file = utils.output_root(image_file, "annotations", output_dir)
160        annotations_file = annotations_file.with_suffix(".png")
161
162        predictions_file = utils.output_root(image_file, "predictions", output_dir)
163        predictions_file = predictions_file.with_suffix(".csv")
164
165        result = model.KerasOcrResult(
166            output_dir=output_dir,
167            annotations_file=annotations_file,
168            predictions_file=predictions_file,
169            items=[],
170        )
171
172        if annotations_file.exists() and predictions_file.exists():
173            logger.debug(
174                "Predictions and annotations files already exist for '%s'.",
175                image_file.stem,
176            )
177            all_items = list(model.TextItem.load(predictions_file))
178            result.items = model.TextItem.order_text_lines(all_items)
179            return result
180
181        # read in the image
182        logger.debug(
183            "Creating predictions and annotations files for '%s'.",
184            image_file.stem,
185        )
186
187        # Each list of predictions in prediction_groups is a list of
188        # (word, box) tuples.
189        images, prediction_groups = self.engine_run(image_file)
190
191        # Plot and save the predictions
192        for image, predictions in zip(images, prediction_groups, strict=False):
193            self.save_figure(annotations_file, image, predictions)
194
195            items = self.convert_predictions(predictions)
196            self.save_items(predictions_file, [item for line in items for item in line])
197            result.items = items
198
199        return result

Recognise text in an image file.

Arguments:

image_file: The path to the image file.
output_dir: The directory to write the results.

Returns:

model.KerasOcrResult: The text recognition results.

def save_figure( self, annotation_file: pathlib.Path, image: numpy.ndarray | None, predictions: list[tuple[typing.Any, typing.Any]]) -> None: View Source

201    def save_figure(
202        self,
203        annotation_file: pathlib.Path,
204        image: np.ndarray | None,  # type: ignore [type-arg]
205        predictions: list[tuple[typing.Any, typing.Any]],
206    ) -> None:
207        """Save the annotated image.
208
209        Args:
210            annotation_file: The path to the file containing annotations.
211            image: The image data.
212            predictions: The text recognition results.
213
214        Returns:
215            None
216        """
217        if not annotation_file:
218            msg = "Must supply annotation file."
219            raise utils.LeafFocusError(msg)
220
221        expected_image_shape = 3
222        if image is None or image.size < 1 or len(image.shape) != expected_image_shape:
223            msg_image = image.shape if image is not None else None
224            msg = f"Must supply valid image data, not '{msg_image}'."
225            raise utils.LeafFocusError(msg)
226        if not predictions:
227            predictions = []
228
229        logger.info("Saving OCR image to '%s'.", annotation_file)
230
231        import matplotlib as mpl
232
233        from matplotlib import pyplot as plt
234
235        mpl.use("agg")
236
237        annotation_file.parent.mkdir(exist_ok=True, parents=True)
238
239        fig, axis = plt.subplots(figsize=(20, 20))
240
241        self.engine_annotate(image, predictions, axis)
242
243        fig.savefig(str(annotation_file))
244        plt.close(fig)

Save the annotated image.

Arguments:

annotation_file: The path to the file containing annotations.
image: The image data.
predictions: The text recognition results.

Returns:

None

def convert_predictions( self, predictions: list[tuple[typing.Any, typing.Any]]) -> list[list[leaf_focus.ocr.model.TextItem]]: View Source

246    def convert_predictions(
247        self,
248        predictions: list[tuple[typing.Any, typing.Any]],
249    ) -> list[list[model.TextItem]]:
250        """Convert predictions to items.
251
252        Args:
253            predictions: The list of recognised text.
254
255        Returns:
256            typing.List[typing.List[model.TextItem]]: The equivalent text items.
257        """
258        if not predictions:
259            predictions = []
260
261        items = [
262            model.TextItem.from_prediction(prediction) for prediction in predictions
263        ]
264
265        # order_text_lines sets the line number and line order
266        line_items = model.TextItem.order_text_lines(items)
267
268        return line_items

Convert predictions to items.

Arguments:

predictions: The list of recognised text.

Returns:

typing.List[typing.List[model.TextItem]]: The equivalent text items.

def save_items( self, items_file: pathlib.Path, items: Iterable[leaf_focus.ocr.model.TextItem]) -> None: View Source

270    def save_items(
271        self,
272        items_file: pathlib.Path,
273        items: typing.Iterable[model.TextItem],
274    ) -> None:
275        """Save items to csv file.
276
277        Args:
278            items_file: Write the text items to this file.
279            items: The text items to save.
280
281        Returns:
282            None
283        """
284        if not items_file:
285            msg = "Must supply predictions file."
286            raise utils.LeafFocusError(msg)
287        if not items:
288            msg = "Must supply predictions data."
289            raise utils.LeafFocusError(msg)
290
291        logger.info("Saving OCR predictions to '%s'.", items_file)
292
293        items_list = list(items)
294        model.TextItem.save(items_file, items_list)

Save items to csv file.

Arguments:

items_file: Write the text items to this file.
items: The text items to save.

Returns:

None