leaf_focus.cli
Command line for leaf focus.
1"""Command line for leaf focus.""" 2 3from __future__ import annotations 4 5import argparse 6import logging 7import pathlib 8import sys 9 10from beartype import beartype 11 12from leaf_focus import app, utils 13 14 15@beartype 16def main(args: list[str] | None = None) -> int: 17 """Run as a command line program. 18 19 Args: 20 args: The program arguments. 21 22 Returns: 23 int: Program exit code. 24 """ 25 if args is None: 26 args = sys.argv[1:] 27 28 logging.basicConfig( 29 format="%(asctime)s [%(levelname)-8s] %(message)s", 30 level=logging.DEBUG, 31 ) 32 logger = logging.getLogger(__name__) 33 34 parser = argparse.ArgumentParser( 35 prog="leaf-focus", 36 description="Extract structured text from a pdf file.", 37 ) 38 parser.add_argument( 39 "--version", 40 action="version", 41 version=f"%(prog)s {utils.get_version()}", 42 ) 43 parser.add_argument( 44 "input_pdf", 45 type=pathlib.Path, 46 help="path to the pdf file to read", 47 ) 48 parser.add_argument( 49 "output_dir", 50 type=pathlib.Path, 51 help="path to the directory to save the extracted text files", 52 ) 53 parser.add_argument( 54 "--exe-dir", 55 required=True, 56 type=pathlib.Path, 57 help="path to the directory containing xpdf executable files", 58 ) 59 parser.add_argument( 60 "--page-images", 61 action="store_true", 62 help="save each page of the pdf as a separate image", 63 ) 64 parser.add_argument( 65 "--ocr", 66 action="store_true", 67 help="run optical character recognition on each page of the pdf", 68 ) 69 parser.add_argument( 70 "--first", 71 type=int, 72 default=None, 73 help="the first pdf page to process", 74 ) 75 parser.add_argument( 76 "--last", 77 type=int, 78 default=None, 79 help="the last pdf page to process", 80 ) 81 parser.add_argument( 82 "--log-level", 83 default="info", 84 choices=["debug", "info", "warning", "error", "critical"], 85 help="the log level: debug, info, warning, error, critical", 86 ) 87 88 parsed_args = parser.parse_args(args) 89 90 app_inst = app.App(exe_dir=parsed_args.exe_dir) 91 92 try: 93 app_args = app.AppArgs( 94 input_pdf=parsed_args.input_pdf, 95 output_dir=parsed_args.output_dir, 96 first_page=parsed_args.first, 97 last_page=parsed_args.last, 98 save_page_images=parsed_args.page_images, 99 run_ocr=parsed_args.ocr, 100 log_level=parsed_args.log_level, 101 ) 102 103 logging.getLogger().setLevel((app_args.log_level or "info").upper()) 104 105 result = app_inst.run(app_args) 106 if result is True: 107 return 0 108 return 1 109 110 except utils.LeafFocusError as error: 111 logger.exception("Error: %s", error.__class__.__name__) 112 return 1 113 114 except Exception as error: 115 logger.exception("Error: %s", error.__class__.__name__) 116 return 2 117 118 119if __name__ == "__main__": 120 sys.exit(main())
def
main(args: list[str] | None = None) -> int:
17def main(args: list[str] | None = None) -> int: 18 """Run as a command line program. 19 20 Args: 21 args: The program arguments. 22 23 Returns: 24 int: Program exit code. 25 """ 26 if args is None: 27 args = sys.argv[1:] 28 29 logging.basicConfig( 30 format="%(asctime)s [%(levelname)-8s] %(message)s", 31 level=logging.DEBUG, 32 ) 33 logger = logging.getLogger(__name__) 34 35 parser = argparse.ArgumentParser( 36 prog="leaf-focus", 37 description="Extract structured text from a pdf file.", 38 ) 39 parser.add_argument( 40 "--version", 41 action="version", 42 version=f"%(prog)s {utils.get_version()}", 43 ) 44 parser.add_argument( 45 "input_pdf", 46 type=pathlib.Path, 47 help="path to the pdf file to read", 48 ) 49 parser.add_argument( 50 "output_dir", 51 type=pathlib.Path, 52 help="path to the directory to save the extracted text files", 53 ) 54 parser.add_argument( 55 "--exe-dir", 56 required=True, 57 type=pathlib.Path, 58 help="path to the directory containing xpdf executable files", 59 ) 60 parser.add_argument( 61 "--page-images", 62 action="store_true", 63 help="save each page of the pdf as a separate image", 64 ) 65 parser.add_argument( 66 "--ocr", 67 action="store_true", 68 help="run optical character recognition on each page of the pdf", 69 ) 70 parser.add_argument( 71 "--first", 72 type=int, 73 default=None, 74 help="the first pdf page to process", 75 ) 76 parser.add_argument( 77 "--last", 78 type=int, 79 default=None, 80 help="the last pdf page to process", 81 ) 82 parser.add_argument( 83 "--log-level", 84 default="info", 85 choices=["debug", "info", "warning", "error", "critical"], 86 help="the log level: debug, info, warning, error, critical", 87 ) 88 89 parsed_args = parser.parse_args(args) 90 91 app_inst = app.App(exe_dir=parsed_args.exe_dir) 92 93 try: 94 app_args = app.AppArgs( 95 input_pdf=parsed_args.input_pdf, 96 output_dir=parsed_args.output_dir, 97 first_page=parsed_args.first, 98 last_page=parsed_args.last, 99 save_page_images=parsed_args.page_images, 100 run_ocr=parsed_args.ocr, 101 log_level=parsed_args.log_level, 102 ) 103 104 logging.getLogger().setLevel((app_args.log_level or "info").upper()) 105 106 result = app_inst.run(app_args) 107 if result is True: 108 return 0 109 return 1 110 111 except utils.LeafFocusError as error: 112 logger.exception("Error: %s", error.__class__.__name__) 113 return 1 114 115 except Exception as error: 116 logger.exception("Error: %s", error.__class__.__name__) 117 return 2
Run as a command line program.
Arguments:
- args: The program arguments.
Returns:
int: Program exit code.