Edit on GitHub

leaf_focus.utils

Small utility functions.

  1"""Small utility functions."""
  2
  3from __future__ import annotations
  4
  5import dataclasses
  6import json
  7import logging
  8import pathlib
  9import platform
 10import re
 11import unicodedata
 12
 13from datetime import date, datetime, time, timezone
 14from enum import Enum
 15from xml.etree.ElementTree import Element
 16
 17from beartype import beartype, typing
 18from importlib_metadata import PackageNotFoundError, distribution
 19from importlib_resources import as_file, files
 20
 21
 22logger = logging.getLogger(__name__)
 23
 24
 25@beartype
 26def get_name_dash() -> str:
 27    """Get the package name with word separated by dashes."""
 28    return "leaf-focus"
 29
 30
 31@beartype
 32def get_name_under() -> str:
 33    """Get the package name with word separated by underscores."""
 34    return "leaf_focus"
 35
 36
 37@beartype
 38def get_version() -> str | None:
 39    """Get the package version."""
 40    try:
 41        dist = distribution(get_name_dash())
 42    except PackageNotFoundError:
 43        pass
 44
 45    else:
 46        return str(dist.version)
 47
 48    try:
 49        with as_file(files(get_name_under()).joinpath("cli.py")) as file_path:
 50            version_text = (file_path.parent.parent.parent / "VERSION").read_text()
 51            return str(version_text.strip())
 52    except FileNotFoundError:
 53        pass
 54
 55    return None
 56
 57
 58@beartype
 59def parse_date(value: str) -> datetime | None:
 60    """Parse a date from a string."""
 61    formats = [
 62        # e.g. 'Thu Aug 13 11:09:00 2020'
 63        "%a %b %d %H:%M:%S %Y",
 64        # e.g. '2011-11-04T00:05:23Z'
 65        "%Y-%m-%dT%H:%M:%SZ",
 66    ]
 67    for fmt in formats:
 68        try:
 69            return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc)
 70        except ValueError:  # noqa: PERF203
 71            logger.debug("Value '%s' did not match date format '%s'.", value, fmt)
 72    return None
 73
 74
 75@beartype
 76def validate(
 77    name: str, value: str | int | None, expected: typing.Iterable[str | int | None]
 78) -> None:
 79    """Validate that a value is one of the expected values."""
 80    if value is not None and value not in expected:
 81        opts = ", ".join(sorted([str(i) for i in expected]))
 82        msg = f"Invalid {name} '{value}'. Expected one of '{opts}'."
 83        raise LeafFocusError(msg)
 84
 85
 86@beartype
 87class ValidatePathMethod(Enum):
 88    """Options for how to validate a path."""
 89
 90    NO_OPINION = 0
 91    MUST_EXIST = 1
 92
 93
 94@beartype
 95def validate_path(
 96    name: str,
 97    value: pathlib.Path,
 98    must_exist: ValidatePathMethod = ValidatePathMethod.NO_OPINION,
 99) -> pathlib.Path:
100    """Validate a path."""
101    if not value:
102        msg = f"Must provide path {name}."
103        raise LeafFocusError(msg)
104
105    try:
106        if must_exist == ValidatePathMethod.MUST_EXIST:
107            abs_path = value.resolve(strict=True)
108        else:
109            abs_path = value.absolute()
110
111    except Exception as error:
112        msg = f"Invalid path '{value}'."
113        raise LeafFocusError(msg) from error
114
115    else:
116        return abs_path
117
118
119@beartype
120def validate_pages(first_page: int | None, last_page: int | None) -> None:
121    """Validate the page range.
122
123    Args:
124        first_page: The first page.
125        last_page: The last page.
126
127    Returns:
128        None
129    """
130    if first_page is None or last_page is None:
131        return
132    if first_page > last_page:
133        msg = (
134            f"First page ({first_page}) must be less than or equal "
135            f"to last page ({last_page})."
136        )
137        raise LeafFocusError(msg)
138
139
140@beartype
141def select_exe(value: pathlib.Path) -> pathlib.Path:
142    """Select the executable path based on the platform."""
143    if platform.system() == "Windows":
144        value = value.with_suffix(".exe")
145
146    if not value.exists():
147        msg = f"Exe file not found '{value}'."
148        raise LeafFocusError(msg) from FileNotFoundError(value)
149
150    return value
151
152
153@beartype
154def output_root(
155    input_file: pathlib.Path,
156    output_type: str,
157    output_path: pathlib.Path,
158    additional: typing.Collection[str] | None = None,
159) -> pathlib.Path:
160    """Build the path to the output."""
161    name_parts = [input_file.stem, output_type]
162    name_parts.extend(additional or [])
163    name_parts = [str(i) for i in name_parts if i is not None]
164    name_parts = [str_norm(i.strip("-")) for i in name_parts if i and i.strip()]
165
166    name = "-".join(name_parts)
167
168    output = output_path / name
169
170    return output
171
172
173_slug_re_1 = re.compile(r"[^\w\s-]")
174_slug_re_2 = re.compile(r"[-\s]+")
175
176
177@beartype
178def str_norm(value: str) -> str:
179    """Normalise a string into the 'slug' format."""
180    separator = "-"
181    encoding = "utf-8"
182
183    norm = unicodedata.normalize("NFKD", value)
184    enc = norm.encode(encoding, "ignore")
185    de_enc = enc.decode(encoding)
186    alpha_num_only = _slug_re_1.sub("", de_enc)
187    alpha_num_tidy = alpha_num_only.strip().lower()
188    result = _slug_re_2.sub(separator, alpha_num_tidy)
189    return result
190
191
192class IsDataclass(typing.Protocol):
193    """A protocol to allow typing for dataclasses."""
194
195    __dataclass_fields__: typing.ClassVar[dict[str, typing.Any]]
196
197
198@beartype
199class CustomJsonEncoder(json.JSONEncoder):
200    """A custom json encoder."""
201
202    def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any:
203        """Conversion used by default."""
204        if isinstance(o, datetime | date | time):
205            return o.isoformat()
206
207        return super().default(o)
208
209
210@beartype
211@dataclasses.dataclass
212class XmlElement:
213    """A simple xml element.
214
215    <tag attrib>text<child/>...</tag>tail
216    """
217
218    attrib: typing.Collection[tuple[str, str, str]]
219    tag: str
220    name_space: str
221    text: str
222    tail: str
223    children: typing.Collection[XmlElement]
224
225    def to_dict(self) -> dict[str, typing.Any]:
226        """Convert xml element to a dict."""
227        result: dict[str, typing.Any] = {"name": self.tag.strip()}
228
229        value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip()
230        if value:
231            result["value"] = value
232
233        attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib}
234        if attributes:
235            result["attributes"] = attributes
236
237        children = [i.to_dict() for i in self.children]
238        if children:
239            result["children"] = children
240
241        return result
242
243    def __str__(self) -> str:
244        """Convert to a string."""
245        tag1 = (self.tag or "").strip()
246        tag2 = f"</{tag1}>"
247        text = (self.text or "").strip()
248        tail = (self.tail or "").strip()
249
250        count = len(self.children)
251        if count == 0:
252            children = ""
253        elif count == 1:
254            children = "(1 child)"
255        else:
256            children = f"({count} children)"
257
258        if text and children:
259            children = " " + children
260
261        if not text and not children:
262            tag2 = ""
263
264        count_attrib = len(self.attrib)
265        if count_attrib == 0:
266            attrib = ""
267        elif count_attrib == 1:
268            attrib = " (1 attribute)"
269        else:
270            attrib = f" ({count} attributes)"
271
272        return f"<{tag1}{attrib}>{text}{children}{tag2}{tail}"
273
274
275@beartype
276def xml_to_element(element: Element) -> XmlElement:
277    """Convert xml into nested dicts."""
278    attrib = element.attrib or {}
279    tag = element.tag
280    text = element.text
281    tail = element.tail
282
283    children = [xml_to_element(child) for child in element]
284
285    tag_ns, tag_name = xml_tag_ns(tag)
286
287    attrib_ns = []
288    for key, value in attrib.items():
289        extracted_ns, extracted_tag = xml_tag_ns(key)
290        attrib_ns.append((extracted_ns, extracted_tag, value))
291
292    item = XmlElement(
293        attrib=attrib_ns,
294        tag=tag_name,
295        name_space=tag_ns,
296        text=text or "",
297        tail=tail or "",
298        children=children,
299    )
300
301    return item
302
303
304@beartype
305def xml_tag_ns(value: str) -> tuple[str, str]:
306    """Get the XML namespace and name.
307
308    Args:
309        value: The combined namespace and name
310
311    Returns:
312        The separate namespace and name
313    """
314    if "}" in value:
315        name_space, name = value.rsplit("}", maxsplit=1)
316        name_space = name_space.strip("{}")
317    else:
318        name_space = ""
319        name = value
320
321    return name_space, name
322
323
324@beartype
325class LeafFocusError(Exception):
326    """A custom error for leaf focus."""
logger = <Logger leaf_focus.utils (WARNING)>
def get_name_dash() -> str:
27def get_name_dash() -> str:
28    """Get the package name with word separated by dashes."""
29    return "leaf-focus"

Get the package name with word separated by dashes.

def get_name_under() -> str:
33def get_name_under() -> str:
34    """Get the package name with word separated by underscores."""
35    return "leaf_focus"

Get the package name with word separated by underscores.

def get_version() -> str | None:
39def get_version() -> str | None:
40    """Get the package version."""
41    try:
42        dist = distribution(get_name_dash())
43    except PackageNotFoundError:
44        pass
45
46    else:
47        return str(dist.version)
48
49    try:
50        with as_file(files(get_name_under()).joinpath("cli.py")) as file_path:
51            version_text = (file_path.parent.parent.parent / "VERSION").read_text()
52            return str(version_text.strip())
53    except FileNotFoundError:
54        pass
55
56    return None

Get the package version.

def parse_date(value: str) -> datetime.datetime | None:
60def parse_date(value: str) -> datetime | None:
61    """Parse a date from a string."""
62    formats = [
63        # e.g. 'Thu Aug 13 11:09:00 2020'
64        "%a %b %d %H:%M:%S %Y",
65        # e.g. '2011-11-04T00:05:23Z'
66        "%Y-%m-%dT%H:%M:%SZ",
67    ]
68    for fmt in formats:
69        try:
70            return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc)
71        except ValueError:  # noqa: PERF203
72            logger.debug("Value '%s' did not match date format '%s'.", value, fmt)
73    return None

Parse a date from a string.

def validate( name: str, value: str | int | None, expected: Iterable[str | int | None]) -> None:
77def validate(
78    name: str, value: str | int | None, expected: typing.Iterable[str | int | None]
79) -> None:
80    """Validate that a value is one of the expected values."""
81    if value is not None and value not in expected:
82        opts = ", ".join(sorted([str(i) for i in expected]))
83        msg = f"Invalid {name} '{value}'. Expected one of '{opts}'."
84        raise LeafFocusError(msg)

Validate that a value is one of the expected values.

@beartype
class ValidatePathMethod(enum.Enum):
87@beartype
88class ValidatePathMethod(Enum):
89    """Options for how to validate a path."""
90
91    NO_OPINION = 0
92    MUST_EXIST = 1

Options for how to validate a path.

NO_OPINION = <ValidatePathMethod.NO_OPINION: 0>
MUST_EXIST = <ValidatePathMethod.MUST_EXIST: 1>
def validate_path( name: str, value: pathlib.Path, must_exist: ValidatePathMethod = <ValidatePathMethod.NO_OPINION: 0>) -> pathlib.Path:
 96def validate_path(
 97    name: str,
 98    value: pathlib.Path,
 99    must_exist: ValidatePathMethod = ValidatePathMethod.NO_OPINION,
100) -> pathlib.Path:
101    """Validate a path."""
102    if not value:
103        msg = f"Must provide path {name}."
104        raise LeafFocusError(msg)
105
106    try:
107        if must_exist == ValidatePathMethod.MUST_EXIST:
108            abs_path = value.resolve(strict=True)
109        else:
110            abs_path = value.absolute()
111
112    except Exception as error:
113        msg = f"Invalid path '{value}'."
114        raise LeafFocusError(msg) from error
115
116    else:
117        return abs_path

Validate a path.

def validate_pages(first_page: int | None, last_page: int | None) -> None:
121def validate_pages(first_page: int | None, last_page: int | None) -> None:
122    """Validate the page range.
123
124    Args:
125        first_page: The first page.
126        last_page: The last page.
127
128    Returns:
129        None
130    """
131    if first_page is None or last_page is None:
132        return
133    if first_page > last_page:
134        msg = (
135            f"First page ({first_page}) must be less than or equal "
136            f"to last page ({last_page})."
137        )
138        raise LeafFocusError(msg)

Validate the page range.

Arguments:
  • first_page: The first page.
  • last_page: The last page.
Returns:

None

def select_exe(value: pathlib.Path) -> pathlib.Path:
142def select_exe(value: pathlib.Path) -> pathlib.Path:
143    """Select the executable path based on the platform."""
144    if platform.system() == "Windows":
145        value = value.with_suffix(".exe")
146
147    if not value.exists():
148        msg = f"Exe file not found '{value}'."
149        raise LeafFocusError(msg) from FileNotFoundError(value)
150
151    return value

Select the executable path based on the platform.

def output_root( input_file: pathlib.Path, output_type: str, output_path: pathlib.Path, additional: Collection[str] | None = None) -> pathlib.Path:
155def output_root(
156    input_file: pathlib.Path,
157    output_type: str,
158    output_path: pathlib.Path,
159    additional: typing.Collection[str] | None = None,
160) -> pathlib.Path:
161    """Build the path to the output."""
162    name_parts = [input_file.stem, output_type]
163    name_parts.extend(additional or [])
164    name_parts = [str(i) for i in name_parts if i is not None]
165    name_parts = [str_norm(i.strip("-")) for i in name_parts if i and i.strip()]
166
167    name = "-".join(name_parts)
168
169    output = output_path / name
170
171    return output

Build the path to the output.

def str_norm(value: str) -> str:
179def str_norm(value: str) -> str:
180    """Normalise a string into the 'slug' format."""
181    separator = "-"
182    encoding = "utf-8"
183
184    norm = unicodedata.normalize("NFKD", value)
185    enc = norm.encode(encoding, "ignore")
186    de_enc = enc.decode(encoding)
187    alpha_num_only = _slug_re_1.sub("", de_enc)
188    alpha_num_tidy = alpha_num_only.strip().lower()
189    result = _slug_re_2.sub(separator, alpha_num_tidy)
190    return result

Normalise a string into the 'slug' format.

class IsDataclass(beartype.typing.Protocol):
193class IsDataclass(typing.Protocol):
194    """A protocol to allow typing for dataclasses."""
195
196    __dataclass_fields__: typing.ClassVar[dict[str, typing.Any]]

A protocol to allow typing for dataclasses.

@beartype
class CustomJsonEncoder(json.encoder.JSONEncoder):
199@beartype
200class CustomJsonEncoder(json.JSONEncoder):
201    """A custom json encoder."""
202
203    def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any:
204        """Conversion used by default."""
205        if isinstance(o, datetime | date | time):
206            return o.isoformat()
207
208        return super().default(o)

A custom json encoder.

def default( self, o: IsDataclass | datetime.datetime | datetime.date | datetime.time) -> Union[str, Any]:
203    def default(self, o: IsDataclass | datetime | date | time) -> str | typing.Any:
204        """Conversion used by default."""
205        if isinstance(o, datetime | date | time):
206            return o.isoformat()
207
208        return super().default(o)

Conversion used by default.

@beartype
@dataclasses.dataclass
class XmlElement:
211@beartype
212@dataclasses.dataclass
213class XmlElement:
214    """A simple xml element.
215
216    <tag attrib>text<child/>...</tag>tail
217    """
218
219    attrib: typing.Collection[tuple[str, str, str]]
220    tag: str
221    name_space: str
222    text: str
223    tail: str
224    children: typing.Collection[XmlElement]
225
226    def to_dict(self) -> dict[str, typing.Any]:
227        """Convert xml element to a dict."""
228        result: dict[str, typing.Any] = {"name": self.tag.strip()}
229
230        value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip()
231        if value:
232            result["value"] = value
233
234        attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib}
235        if attributes:
236            result["attributes"] = attributes
237
238        children = [i.to_dict() for i in self.children]
239        if children:
240            result["children"] = children
241
242        return result
243
244    def __str__(self) -> str:
245        """Convert to a string."""
246        tag1 = (self.tag or "").strip()
247        tag2 = f"</{tag1}>"
248        text = (self.text or "").strip()
249        tail = (self.tail or "").strip()
250
251        count = len(self.children)
252        if count == 0:
253            children = ""
254        elif count == 1:
255            children = "(1 child)"
256        else:
257            children = f"({count} children)"
258
259        if text and children:
260            children = " " + children
261
262        if not text and not children:
263            tag2 = ""
264
265        count_attrib = len(self.attrib)
266        if count_attrib == 0:
267            attrib = ""
268        elif count_attrib == 1:
269            attrib = " (1 attribute)"
270        else:
271            attrib = f" ({count} attributes)"
272
273        return f"<{tag1}{attrib}>{text}{children}{tag2}{tail}"

A simple xml element.

text...tail

XmlElement( attrib: Collection[tuple[str, str, str]], tag: str, name_space: str, text: str, tail: str, children: Collection[XmlElement])
attrib: Collection[tuple[str, str, str]]
tag: str
name_space: str
text: str
tail: str
children: Collection[XmlElement]
def to_dict(self) -> dict[str, typing.Any]:
226    def to_dict(self) -> dict[str, typing.Any]:
227        """Convert xml element to a dict."""
228        result: dict[str, typing.Any] = {"name": self.tag.strip()}
229
230        value = ((self.text or "").strip() + " " + (self.tail or "").strip()).strip()
231        if value:
232            result["value"] = value
233
234        attributes = {k.strip(): (v or "").strip() for n, k, v in self.attrib}
235        if attributes:
236            result["attributes"] = attributes
237
238        children = [i.to_dict() for i in self.children]
239        if children:
240            result["children"] = children
241
242        return result

Convert xml element to a dict.

def xml_to_element(element: xml.etree.ElementTree.Element) -> XmlElement:
277def xml_to_element(element: Element) -> XmlElement:
278    """Convert xml into nested dicts."""
279    attrib = element.attrib or {}
280    tag = element.tag
281    text = element.text
282    tail = element.tail
283
284    children = [xml_to_element(child) for child in element]
285
286    tag_ns, tag_name = xml_tag_ns(tag)
287
288    attrib_ns = []
289    for key, value in attrib.items():
290        extracted_ns, extracted_tag = xml_tag_ns(key)
291        attrib_ns.append((extracted_ns, extracted_tag, value))
292
293    item = XmlElement(
294        attrib=attrib_ns,
295        tag=tag_name,
296        name_space=tag_ns,
297        text=text or "",
298        tail=tail or "",
299        children=children,
300    )
301
302    return item

Convert xml into nested dicts.

def xml_tag_ns(value: str) -> tuple[str, str]:
306def xml_tag_ns(value: str) -> tuple[str, str]:
307    """Get the XML namespace and name.
308
309    Args:
310        value: The combined namespace and name
311
312    Returns:
313        The separate namespace and name
314    """
315    if "}" in value:
316        name_space, name = value.rsplit("}", maxsplit=1)
317        name_space = name_space.strip("{}")
318    else:
319        name_space = ""
320        name = value
321
322    return name_space, name

Get the XML namespace and name.

Arguments:
  • value: The combined namespace and name
Returns:

The separate namespace and name

@beartype
class LeafFocusError(builtins.Exception):
325@beartype
326class LeafFocusError(Exception):
327    """A custom error for leaf focus."""

A custom error for leaf focus.