Source code for rdflib.compat

"""
Utility functions and objects to ease Python 2/3 compatibility,
and different versions of support libraries.
"""

import codecs
import re
import warnings
from typing import Match


[docs]def cast_bytes(s, enc="utf-8"):
    if isinstance(s, str):
        return s.encode(enc)
    return s


[docs]def ascii(stream):
    return codecs.getreader("ascii")(stream)


[docs]def bopen(*args, **kwargs):
    return open(*args, mode="rb", **kwargs)


long_type = int


[docs]def sign(n):
    if n < 0:
        return -1
    if n > 0:
        return 1
    return 0


r_unicodeEscape = re.compile(r"(\\u[0-9A-Fa-f]{4}|\\U[0-9A-Fa-f]{8})")


def _unicodeExpand(s):
    return r_unicodeEscape.sub(lambda m: chr(int(m.group(0)[2:], 16)), s)


[docs]def decodeStringEscape(s):
    warnings.warn(
        DeprecationWarning(
            "rdflib.compat.decodeStringEscape() is deprecated, "
            "it will be removed in rdflib 7.0.0. "
            "This function is not used anywhere in rdflib anymore "
            "and the utility that it does provide is not implemented correctly."
        )
    )
    r"""
    s is byte-string - replace \ escapes in string
    """

    s = s.replace("\\t", "\t")
    s = s.replace("\\n", "\n")
    s = s.replace("\\r", "\r")
    s = s.replace("\\b", "\b")
    s = s.replace("\\f", "\f")
    s = s.replace('\\"', '"')
    s = s.replace("\\'", "'")
    s = s.replace("\\\\", "\\")

    return s
    # return _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping


_string_escape_map = {
    "t": "\t",
    "b": "\b",
    "n": "\n",
    "r": "\r",
    "f": "\f",
    '"': '"',
    "'": "'",
    "\\": "\\",
}


def _turtle_escape_subber(match: Match[str]) -> str:
    smatch, umatch = match.groups()
    if smatch is not None:
        return _string_escape_map[smatch]
    else:
        return chr(int(umatch[1:], 16))


_turtle_escape_pattern = re.compile(
    r"""\\(?:([tbnrf"'\\])|(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))""",
)


[docs]def decodeUnicodeEscape(escaped: str) -> str:
    if "\\" not in escaped:
        # Most of times, there are no backslashes in strings.
        return escaped
    return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped)