Edit on GitHub

leaf_focus.cli

Command line for leaf focus.

  1"""Command line for leaf focus."""
  2
  3from __future__ import annotations
  4
  5import argparse
  6import logging
  7import pathlib
  8import sys
  9
 10from beartype import beartype
 11
 12from leaf_focus import app, utils
 13
 14
 15@beartype
 16def main(args: list[str] | None = None) -> int:
 17    """Run as a command line program.
 18
 19    Args:
 20        args: The program arguments.
 21
 22    Returns:
 23        int: Program exit code.
 24    """
 25    if args is None:
 26        args = sys.argv[1:]
 27
 28    logging.basicConfig(
 29        format="%(asctime)s [%(levelname)-8s] %(message)s",
 30        level=logging.DEBUG,
 31    )
 32    logger = logging.getLogger(__name__)
 33
 34    parser = argparse.ArgumentParser(
 35        prog="leaf-focus",
 36        description="Extract structured text from a pdf file.",
 37    )
 38    parser.add_argument(
 39        "--version",
 40        action="version",
 41        version=f"%(prog)s {utils.get_version()}",
 42    )
 43    parser.add_argument(
 44        "input_pdf",
 45        type=pathlib.Path,
 46        help="path to the pdf file to read",
 47    )
 48    parser.add_argument(
 49        "output_dir",
 50        type=pathlib.Path,
 51        help="path to the directory to save the extracted text files",
 52    )
 53    parser.add_argument(
 54        "--exe-dir",
 55        required=True,
 56        type=pathlib.Path,
 57        help="path to the directory containing xpdf executable files",
 58    )
 59    parser.add_argument(
 60        "--page-images",
 61        action="store_true",
 62        help="save each page of the pdf as a separate image",
 63    )
 64    parser.add_argument(
 65        "--ocr",
 66        action="store_true",
 67        help="run optical character recognition on each page of the pdf",
 68    )
 69    parser.add_argument(
 70        "--first",
 71        type=int,
 72        default=None,
 73        help="the first pdf page to process",
 74    )
 75    parser.add_argument(
 76        "--last",
 77        type=int,
 78        default=None,
 79        help="the last pdf page to process",
 80    )
 81    parser.add_argument(
 82        "--log-level",
 83        default="info",
 84        choices=["debug", "info", "warning", "error", "critical"],
 85        help="the log level: debug, info, warning, error, critical",
 86    )
 87
 88    parsed_args = parser.parse_args(args)
 89
 90    app_inst = app.App(exe_dir=parsed_args.exe_dir)
 91
 92    try:
 93        app_args = app.AppArgs(
 94            input_pdf=parsed_args.input_pdf,
 95            output_dir=parsed_args.output_dir,
 96            first_page=parsed_args.first,
 97            last_page=parsed_args.last,
 98            save_page_images=parsed_args.page_images,
 99            run_ocr=parsed_args.ocr,
100            log_level=parsed_args.log_level,
101        )
102
103        logging.getLogger().setLevel((app_args.log_level or "info").upper())
104
105        result = app_inst.run(app_args)
106        if result is True:
107            return 0
108        return 1
109
110    except utils.LeafFocusError as error:
111        logger.exception("Error: %s", error.__class__.__name__)
112        return 1
113
114    except Exception as error:
115        logger.exception("Error: %s", error.__class__.__name__)
116        return 2
117
118
119if __name__ == "__main__":
120    sys.exit(main())
def main(args: list[str] | None = None) -> int:
 17def main(args: list[str] | None = None) -> int:
 18    """Run as a command line program.
 19
 20    Args:
 21        args: The program arguments.
 22
 23    Returns:
 24        int: Program exit code.
 25    """
 26    if args is None:
 27        args = sys.argv[1:]
 28
 29    logging.basicConfig(
 30        format="%(asctime)s [%(levelname)-8s] %(message)s",
 31        level=logging.DEBUG,
 32    )
 33    logger = logging.getLogger(__name__)
 34
 35    parser = argparse.ArgumentParser(
 36        prog="leaf-focus",
 37        description="Extract structured text from a pdf file.",
 38    )
 39    parser.add_argument(
 40        "--version",
 41        action="version",
 42        version=f"%(prog)s {utils.get_version()}",
 43    )
 44    parser.add_argument(
 45        "input_pdf",
 46        type=pathlib.Path,
 47        help="path to the pdf file to read",
 48    )
 49    parser.add_argument(
 50        "output_dir",
 51        type=pathlib.Path,
 52        help="path to the directory to save the extracted text files",
 53    )
 54    parser.add_argument(
 55        "--exe-dir",
 56        required=True,
 57        type=pathlib.Path,
 58        help="path to the directory containing xpdf executable files",
 59    )
 60    parser.add_argument(
 61        "--page-images",
 62        action="store_true",
 63        help="save each page of the pdf as a separate image",
 64    )
 65    parser.add_argument(
 66        "--ocr",
 67        action="store_true",
 68        help="run optical character recognition on each page of the pdf",
 69    )
 70    parser.add_argument(
 71        "--first",
 72        type=int,
 73        default=None,
 74        help="the first pdf page to process",
 75    )
 76    parser.add_argument(
 77        "--last",
 78        type=int,
 79        default=None,
 80        help="the last pdf page to process",
 81    )
 82    parser.add_argument(
 83        "--log-level",
 84        default="info",
 85        choices=["debug", "info", "warning", "error", "critical"],
 86        help="the log level: debug, info, warning, error, critical",
 87    )
 88
 89    parsed_args = parser.parse_args(args)
 90
 91    app_inst = app.App(exe_dir=parsed_args.exe_dir)
 92
 93    try:
 94        app_args = app.AppArgs(
 95            input_pdf=parsed_args.input_pdf,
 96            output_dir=parsed_args.output_dir,
 97            first_page=parsed_args.first,
 98            last_page=parsed_args.last,
 99            save_page_images=parsed_args.page_images,
100            run_ocr=parsed_args.ocr,
101            log_level=parsed_args.log_level,
102        )
103
104        logging.getLogger().setLevel((app_args.log_level or "info").upper())
105
106        result = app_inst.run(app_args)
107        if result is True:
108            return 0
109        return 1
110
111    except utils.LeafFocusError as error:
112        logger.exception("Error: %s", error.__class__.__name__)
113        return 1
114
115    except Exception as error:
116        logger.exception("Error: %s", error.__class__.__name__)
117        return 2

Run as a command line program.

Arguments:
  • args: The program arguments.
Returns:

int: Program exit code.