leaf_focus.utils
Small utility functions.
1"""Small utility functions.""" 2 3from __future__ import annotations 4 5import dataclasses 6import json 7import logging 8import pathlib 9import platform 10import re 11import unicodedata 12 13from datetime import date, datetime, time, timezone 14from enum import Enum 15from xml.etree.ElementTree import Element 16 17from beartype import beartype, typing 18from importlib_metadata import PackageNotFoundError, distribution 19from importlib_resources import as_file, files 20 21 22logger = logging.getLogger(__name__) 23 24 25@beartype 26def get_name_dash() -> str: 27 """Get the package name with word separated by dashes.""" 28 return "leaf-focus" 29 30 31@beartype 32def get_name_under() -> str: 33 """Get the package name with word separated by underscores.""" 34 return "leaf_focus" 35 36 37@beartype 38def get_version() -> str | None: 39 """Get the package version.""" 40 try: 41 dist = distribution(get_name_dash()) 42 except PackageNotFoundError: 43 pass 44 45 else: 46 return str(dist.version) 47 48 try: 49 with as_file(files(get_name_under()).joinpath("cli.py")) as file_path: 50 version_text = (file_path.parent.parent.parent / "VERSION").read_text() 51 return str(version_text.strip()) 52 except FileNotFoundError: 53 pass 54 55 return None 56 57 58@beartype 59def parse_date(value: str) -> datetime | None: 60 """Parse a date from a string.""" 61 formats = [ 62 # e.g. 'Thu Aug 13 11:09:00 2020' 63 "%a %b %d %H:%M:%S %Y", 64 # e.g. '2011-11-04T00:05:23Z' 65 "%Y-%m-%dT%H:%M:%SZ", 66 ] 67 for fmt in formats: 68 try: 69 return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc) 70 except ValueError: # noqa: PERF203 71 logger.debug("Value '%s' did not match date format '%s'.", value, fmt) 72 return None 73 74 75@beartype 76def validate( 77 name: str, value: str | int | None, expected: typing.Iterable[str | int | None] 78) -> None: 79 """Validate that a value is one of the expected values.""" 80 if value is not None and value not in expected: 81 opts = ", ".join(sorted([str(i) for i in expected])) 82 msg = f"Invalid {name} '{value}'. Expected one of '{opts}'." 83 raise LeafFocusError(msg) 84 85 86@beartype 87class ValidatePathMethod(Enum): 88 """Options for how to validate a path.""" 89 90 NO_OPINION = 0 91 MUST_EXIST = 1 92 93 94@beartype 95def validate_path( 96 name: str, 97 value: pathlib.Path, 98 must_exist: ValidatePathMethod = ValidatePathMethod.NO_OPINION, 99) -> pathlib.Path: 100 """Validate a path.""" 101 if not value: 102 msg = f"Must provide path {name}." 103 raise LeafFocusError(msg) 104 105 try: 106 if must_exist == ValidatePathMethod.MUST_EXIST: 107 abs_path = value.resolve(strict=True) 108 else: 109 abs_path = value.absolute() 110 111 except Exception as error: 112 msg = f"Invalid path '{value}'." 113 raise LeafFocusError(msg) from error 114 115 else: 116 return abs_path 117 118 119@beartype 120def validate_pages(first_page: int | None, last_page: int | None) -> None: 121 """Validate the page range. 122 123 Args: 124 first_page: The first page. 125 last_page: The last page. 126 127 Returns: 128 None 129 """ 130 if first_page is None or last_page is None: 131 return 132 if first_page > last_page: 133 msg = ( 134 f"First page ({first_page}) must be less than or equal " 135 f"to last page ({last_page})." 136 ) 137 raise LeafFocusError(msg) 138 139 140@beartype 141def select_exe(value: pathlib.Path) -> pathlib.Path: 142 """Select the executable path based on the platform.""" 143 if platform.system() == "Windows": 144 value = value.with_suffix(".exe") 145 146 if not value.exists(): 147 msg = f"Exe file not found '{value}'." 148 raise LeafFocusError(msg) from FileNotFoundError(value) 149 150 return value 151 152 153@beartype 154def output_root( 155 input_file: pathlib.Path, 156 output_type: str, 157 output_path: pathlib.Path, 158 additional: typing.Collection[str] | None = None, 159) -> pathlib.Path: 160 """Build the path to the output.""" 161 name_parts = [input_file.stem, output_type] 162 name_parts.extend(additional or []) 163 name_parts = [str(i) for i in name_parts if i is not None] 164 name_parts = [str_norm(i.strip("-")) for i in name_parts if i and i.strip()] 165 166 name = "-".join(name_parts) 167 168 output = output_path / name 169 170 return output 171 172 173_slug_re_1 = re.compile(r"[^\w\s-]") 174_slug_re_2 = re.compile(r"[-\s]+") 175 176 177@beartype 178def str_norm(value: str) -> str: 179 """Normalise a string into the 'slug' format.""" 180 separator = "-" 181 encoding = "utf-8" 182 183 norm = unicodedata.normalize("NFKD", value) 184 enc = norm.encode(encoding, "ignore") 185 de_enc = enc.decode(encoding) 186 alpha_num_only = _slug_re_1.sub("", de_enc) 187 alpha_num_tidy = alpha_num_only.strip().lower() 188 result = _slug_re_2.sub(separator, alpha_num_tidy) 189 return result 190 191 192class IsDataclass(typing.Protocol): 193 """A protocol to allow typing for dataclasses.""" 194 195 __dataclass_fields__: typing.ClassVar[dict[str, typing.Any]] 196 197 198@beartype 199class CustomJsonEncoder(json.JSONEncoder): 200 """A custom json encoder.""" 201 202 def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any: 203 """Conversion used by default.""" 204 if isinstance(o, datetime | date | time): 205 return o.isoformat() 206 207 return super().default(o) 208 209 210@beartype 211@dataclasses.dataclass 212class XmlElement: 213 """A simple xml element. 214 215 <tag attrib>text<child/>...</tag>tail 216 """ 217 218 attrib: typing.Collection[tuple[str, str, str]] 219 tag: str 220 name_space: str 221 text: str 222 tail: str 223 children: typing.Collection[XmlElement] 224 225 def to_dict(self) -> dict[str, typing.Any]: 226 """Convert xml element to a dict.""" 227 result: dict[str, typing.Any] = {"name": self.tag.strip()} 228 229 value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip() 230 if value: 231 result["value"] = value 232 233 attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib} 234 if attributes: 235 result["attributes"] = attributes 236 237 children = [i.to_dict() for i in self.children] 238 if children: 239 result["children"] = children 240 241 return result 242 243 def __str__(self) -> str: 244 """Convert to a string.""" 245 tag1 = (self.tag or "").strip() 246 tag2 = f"</{tag1}>" 247 text = (self.text or "").strip() 248 tail = (self.tail or "").strip() 249 250 count = len(self.children) 251 if count == 0: 252 children = "" 253 elif count == 1: 254 children = "(1 child)" 255 else: 256 children = f"({count} children)" 257 258 if text and children: 259 children = " " + children 260 261 if not text and not children: 262 tag2 = "" 263 264 count_attrib = len(self.attrib) 265 if count_attrib == 0: 266 attrib = "" 267 elif count_attrib == 1: 268 attrib = " (1 attribute)" 269 else: 270 attrib = f" ({count} attributes)" 271 272 return f"<{tag1}{attrib}>{text}{children}{tag2}{tail}" 273 274 275@beartype 276def xml_to_element(element: Element) -> XmlElement: 277 """Convert xml into nested dicts.""" 278 attrib = element.attrib or {} 279 tag = element.tag 280 text = element.text 281 tail = element.tail 282 283 children = [xml_to_element(child) for child in element] 284 285 tag_ns, tag_name = xml_tag_ns(tag) 286 287 attrib_ns = [] 288 for key, value in attrib.items(): 289 extracted_ns, extracted_tag = xml_tag_ns(key) 290 attrib_ns.append((extracted_ns, extracted_tag, value)) 291 292 item = XmlElement( 293 attrib=attrib_ns, 294 tag=tag_name, 295 name_space=tag_ns, 296 text=text or "", 297 tail=tail or "", 298 children=children, 299 ) 300 301 return item 302 303 304@beartype 305def xml_tag_ns(value: str) -> tuple[str, str]: 306 """Get the XML namespace and name. 307 308 Args: 309 value: The combined namespace and name 310 311 Returns: 312 The separate namespace and name 313 """ 314 if "}" in value: 315 name_space, name = value.rsplit("}", maxsplit=1) 316 name_space = name_space.strip("{}") 317 else: 318 name_space = "" 319 name = value 320 321 return name_space, name 322 323 324@beartype 325class LeafFocusError(Exception): 326 """A custom error for leaf focus."""
logger =
<Logger leaf_focus.utils (WARNING)>
def
get_name_dash() -> str:
27def get_name_dash() -> str: 28 """Get the package name with word separated by dashes.""" 29 return "leaf-focus"
Get the package name with word separated by dashes.
def
get_name_under() -> str:
33def get_name_under() -> str: 34 """Get the package name with word separated by underscores.""" 35 return "leaf_focus"
Get the package name with word separated by underscores.
def
get_version() -> str | None:
39def get_version() -> str | None: 40 """Get the package version.""" 41 try: 42 dist = distribution(get_name_dash()) 43 except PackageNotFoundError: 44 pass 45 46 else: 47 return str(dist.version) 48 49 try: 50 with as_file(files(get_name_under()).joinpath("cli.py")) as file_path: 51 version_text = (file_path.parent.parent.parent / "VERSION").read_text() 52 return str(version_text.strip()) 53 except FileNotFoundError: 54 pass 55 56 return None
Get the package version.
def
parse_date(value: str) -> datetime.datetime | None:
60def parse_date(value: str) -> datetime | None: 61 """Parse a date from a string.""" 62 formats = [ 63 # e.g. 'Thu Aug 13 11:09:00 2020' 64 "%a %b %d %H:%M:%S %Y", 65 # e.g. '2011-11-04T00:05:23Z' 66 "%Y-%m-%dT%H:%M:%SZ", 67 ] 68 for fmt in formats: 69 try: 70 return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc) 71 except ValueError: # noqa: PERF203 72 logger.debug("Value '%s' did not match date format '%s'.", value, fmt) 73 return None
Parse a date from a string.
def
validate( name: str, value: str | int | None, expected: Iterable[str | int | None]) -> None:
77def validate( 78 name: str, value: str | int | None, expected: typing.Iterable[str | int | None] 79) -> None: 80 """Validate that a value is one of the expected values.""" 81 if value is not None and value not in expected: 82 opts = ", ".join(sorted([str(i) for i in expected])) 83 msg = f"Invalid {name} '{value}'. Expected one of '{opts}'." 84 raise LeafFocusError(msg)
Validate that a value is one of the expected values.
@beartype
class
ValidatePathMethod87@beartype 88class ValidatePathMethod(Enum): 89 """Options for how to validate a path.""" 90 91 NO_OPINION = 0 92 MUST_EXIST = 1
Options for how to validate a path.
NO_OPINION =
<ValidatePathMethod.NO_OPINION: 0>
MUST_EXIST =
<ValidatePathMethod.MUST_EXIST: 1>
def
validate_path( name: str, value: pathlib.Path, must_exist: ValidatePathMethod = <ValidatePathMethod.NO_OPINION: 0>) -> pathlib.Path:
96def validate_path( 97 name: str, 98 value: pathlib.Path, 99 must_exist: ValidatePathMethod = ValidatePathMethod.NO_OPINION, 100) -> pathlib.Path: 101 """Validate a path.""" 102 if not value: 103 msg = f"Must provide path {name}." 104 raise LeafFocusError(msg) 105 106 try: 107 if must_exist == ValidatePathMethod.MUST_EXIST: 108 abs_path = value.resolve(strict=True) 109 else: 110 abs_path = value.absolute() 111 112 except Exception as error: 113 msg = f"Invalid path '{value}'." 114 raise LeafFocusError(msg) from error 115 116 else: 117 return abs_path
Validate a path.
def
validate_pages(first_page: int | None, last_page: int | None) -> None:
121def validate_pages(first_page: int | None, last_page: int | None) -> None: 122 """Validate the page range. 123 124 Args: 125 first_page: The first page. 126 last_page: The last page. 127 128 Returns: 129 None 130 """ 131 if first_page is None or last_page is None: 132 return 133 if first_page > last_page: 134 msg = ( 135 f"First page ({first_page}) must be less than or equal " 136 f"to last page ({last_page})." 137 ) 138 raise LeafFocusError(msg)
Validate the page range.
Arguments:
- first_page: The first page.
- last_page: The last page.
Returns:
None
def
select_exe(value: pathlib.Path) -> pathlib.Path:
142def select_exe(value: pathlib.Path) -> pathlib.Path: 143 """Select the executable path based on the platform.""" 144 if platform.system() == "Windows": 145 value = value.with_suffix(".exe") 146 147 if not value.exists(): 148 msg = f"Exe file not found '{value}'." 149 raise LeafFocusError(msg) from FileNotFoundError(value) 150 151 return value
Select the executable path based on the platform.
def
output_root( input_file: pathlib.Path, output_type: str, output_path: pathlib.Path, additional: Collection[str] | None = None) -> pathlib.Path:
155def output_root( 156 input_file: pathlib.Path, 157 output_type: str, 158 output_path: pathlib.Path, 159 additional: typing.Collection[str] | None = None, 160) -> pathlib.Path: 161 """Build the path to the output.""" 162 name_parts = [input_file.stem, output_type] 163 name_parts.extend(additional or []) 164 name_parts = [str(i) for i in name_parts if i is not None] 165 name_parts = [str_norm(i.strip("-")) for i in name_parts if i and i.strip()] 166 167 name = "-".join(name_parts) 168 169 output = output_path / name 170 171 return output
Build the path to the output.
def
str_norm(value: str) -> str:
179def str_norm(value: str) -> str: 180 """Normalise a string into the 'slug' format.""" 181 separator = "-" 182 encoding = "utf-8" 183 184 norm = unicodedata.normalize("NFKD", value) 185 enc = norm.encode(encoding, "ignore") 186 de_enc = enc.decode(encoding) 187 alpha_num_only = _slug_re_1.sub("", de_enc) 188 alpha_num_tidy = alpha_num_only.strip().lower() 189 result = _slug_re_2.sub(separator, alpha_num_tidy) 190 return result
Normalise a string into the 'slug' format.
class
IsDataclass(beartype.typing.Protocol):
193class IsDataclass(typing.Protocol): 194 """A protocol to allow typing for dataclasses.""" 195 196 __dataclass_fields__: typing.ClassVar[dict[str, typing.Any]]
A protocol to allow typing for dataclasses.
@beartype
class
CustomJsonEncoder199@beartype 200class CustomJsonEncoder(json.JSONEncoder): 201 """A custom json encoder.""" 202 203 def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any: 204 """Conversion used by default.""" 205 if isinstance(o, datetime | date | time): 206 return o.isoformat() 207 208 return super().default(o)
A custom json encoder.
def
default( self, o: IsDataclass | datetime.datetime | datetime.date | datetime.time) -> Union[str, Any]:
203 def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any: 204 """Conversion used by default.""" 205 if isinstance(o, datetime | date | time): 206 return o.isoformat() 207 208 return super().default(o)
Conversion used by default.
@beartype
@dataclasses.dataclass
class
XmlElement:
211@beartype 212@dataclasses.dataclass 213class XmlElement: 214 """A simple xml element. 215 216 <tag attrib>text<child/>...</tag>tail 217 """ 218 219 attrib: typing.Collection[tuple[str, str, str]] 220 tag: str 221 name_space: str 222 text: str 223 tail: str 224 children: typing.Collection[XmlElement] 225 226 def to_dict(self) -> dict[str, typing.Any]: 227 """Convert xml element to a dict.""" 228 result: dict[str, typing.Any] = {"name": self.tag.strip()} 229 230 value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip() 231 if value: 232 result["value"] = value 233 234 attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib} 235 if attributes: 236 result["attributes"] = attributes 237 238 children = [i.to_dict() for i in self.children] 239 if children: 240 result["children"] = children 241 242 return result 243 244 def __str__(self) -> str: 245 """Convert to a string.""" 246 tag1 = (self.tag or "").strip() 247 tag2 = f"</{tag1}>" 248 text = (self.text or "").strip() 249 tail = (self.tail or "").strip() 250 251 count = len(self.children) 252 if count == 0: 253 children = "" 254 elif count == 1: 255 children = "(1 child)" 256 else: 257 children = f"({count} children)" 258 259 if text and children: 260 children = " " + children 261 262 if not text and not children: 263 tag2 = "" 264 265 count_attrib = len(self.attrib) 266 if count_attrib == 0: 267 attrib = "" 268 elif count_attrib == 1: 269 attrib = " (1 attribute)" 270 else: 271 attrib = f" ({count} attributes)" 272 273 return f"<{tag1}{attrib}>{text}{children}{tag2}{tail}"
A simple xml element.
XmlElement( attrib: Collection[tuple[str, str, str]], tag: str, name_space: str, text: str, tail: str, children: Collection[XmlElement])
children: Collection[XmlElement]
def
to_dict(self) -> dict[str, typing.Any]:
226 def to_dict(self) -> dict[str, typing.Any]: 227 """Convert xml element to a dict.""" 228 result: dict[str, typing.Any] = {"name": self.tag.strip()} 229 230 value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip() 231 if value: 232 result["value"] = value 233 234 attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib} 235 if attributes: 236 result["attributes"] = attributes 237 238 children = [i.to_dict() for i in self.children] 239 if children: 240 result["children"] = children 241 242 return result
Convert xml element to a dict.
277def xml_to_element(element: Element) -> XmlElement: 278 """Convert xml into nested dicts.""" 279 attrib = element.attrib or {} 280 tag = element.tag 281 text = element.text 282 tail = element.tail 283 284 children = [xml_to_element(child) for child in element] 285 286 tag_ns, tag_name = xml_tag_ns(tag) 287 288 attrib_ns = [] 289 for key, value in attrib.items(): 290 extracted_ns, extracted_tag = xml_tag_ns(key) 291 attrib_ns.append((extracted_ns, extracted_tag, value)) 292 293 item = XmlElement( 294 attrib=attrib_ns, 295 tag=tag_name, 296 name_space=tag_ns, 297 text=text or "", 298 tail=tail or "", 299 children=children, 300 ) 301 302 return item
Convert xml into nested dicts.
def
xml_tag_ns(value: str) -> tuple[str, str]:
306def xml_tag_ns(value: str) -> tuple[str, str]: 307 """Get the XML namespace and name. 308 309 Args: 310 value: The combined namespace and name 311 312 Returns: 313 The separate namespace and name 314 """ 315 if "}" in value: 316 name_space, name = value.rsplit("}", maxsplit=1) 317 name_space = name_space.strip("{}") 318 else: 319 name_space = "" 320 name = value 321 322 return name_space, name
Get the XML namespace and name.
Arguments:
- value: The combined namespace and name
Returns:
The separate namespace and name
@beartype
class
LeafFocusErrorA custom error for leaf focus.