git_dag.utils

src/git_dag/utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""Misc utils."""

import codecs
import re
from datetime import datetime, timedelta


def escape_decode(text: str) -> str:
    """Decode escapes of escapes (e.g., ``\\\\n -> \\n``).

    Note
    -----
    The approach in https://stackoverflow.com/a/37059682 is used because it handles
    unicode characters. FIXME: unfortunately, it relies on the internal function
    ``codecs.escape_decode`` (https://github.com/python/cpython/issues/74773).

    """
    return codecs.escape_decode(text.encode())[0].decode()  # type: ignore


def transform_ascii_control_chars(text: str) -> str:
    """Transform ascii control characters.

    Note
    -----
    This is necessary because SVGs exported from graphviz cannot be displayed when they
    contain certain ascii control characters.

    """

    def ascii_to_caret_notation(match: re.Match[str]) -> str:
        char = match.group(0)
        return f"^{chr(ord(char) + 64)}"

    # do not transform \a \b \t \n \v \f \r (which correspond to ^G-^M)
    # https://en.wikipedia.org/wiki/ASCII#Control_code_table
    return re.sub(r"[\x01-\x06\x0E-\x1A]", ascii_to_caret_notation, text)


def creator_timestamp_format(
    data: str, fmt: str = "%a %b %d %H:%M:%S %Y"
) -> tuple[str, str, str]:
    """Format a creator (author/committer) and timestamp.

    Note
    -----
    The default format (``fmt``) is the same as the default format used by git.

    """

    def formatter(timestamp_timezone: str) -> str:
        """Convert a string containing a timestamp and maybe a timezone."""
        split = timestamp_timezone.split()
        date_time = datetime.fromtimestamp(int(split[0])).strftime(fmt)
        return f"{date_time} {split[1]}" if len(split) == 2 else date_time

    match = re.search("(?P<name>.*) (?P<email><.*>) (?P<date>.*)", data)
    if match:
        creator = match.group("name")
        email = match.group("email")
        date = formatter(match.group("date"))
        return creator, email, date

    raise ValueError("Creator pattern not matched.")


def increase_date(
    date: str,
    hours: int = 1,
    date_format: str = "%d/%m/%y %H:%M %z",
) -> str:
    """Increase a date by a given number of hours."""
    date_obj = datetime.strptime(date, date_format)
    return (date_obj + timedelta(hours=hours)).strftime(date_format)