Source code for rdflib.plugins.shared.jsonld.util

# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/util.py
from __future__ import annotations

import pathlib
from typing import IO, TYPE_CHECKING, Any, Optional, TextIO, Tuple, Union

if TYPE_CHECKING:
    import json
else:
    try:
        import json

        assert json  # workaround for pyflakes issue #13
    except ImportError:
        import simplejson as json

from io import TextIOBase, TextIOWrapper
from posixpath import normpath, sep
from urllib.parse import urljoin, urlsplit, urlunsplit

from rdflib.parser import (
    BytesIOWrapper,
    InputSource,
    PythonInputSource,
    StringInputSource,
    URLInputSource,
    create_input_source,
)


[docs]def source_to_json( source: Optional[ Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath] ] ) -> Optional[Any]: if isinstance(source, PythonInputSource): return source.data if isinstance(source, StringInputSource): return json.load(source.getCharacterStream()) # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source, format="json-ld") stream = source.getByteStream() try: if isinstance(stream, BytesIOWrapper): stream = stream.wrapped # Use character stream as-is, or interpret byte stream as UTF-8 if isinstance(stream, TextIOBase): use_stream = stream else: use_stream = TextIOWrapper(stream, encoding="utf-8") return json.load(use_stream) finally: stream.close()
VOCAB_DELIMS = ("#", "/", ":")
[docs]def split_iri(iri: str) -> Tuple[str, Optional[str]]: for delim in VOCAB_DELIMS: at = iri.rfind(delim) if at > -1: return iri[: at + 1], iri[at + 1 :] return iri, None
[docs]def norm_url(base: str, url: str) -> str: """ >>> norm_url('http://example.org/', '/one') 'http://example.org/one' >>> norm_url('http://example.org/', '/one#') 'http://example.org/one#' >>> norm_url('http://example.org/one', 'two') 'http://example.org/two' >>> norm_url('http://example.org/one/', 'two') 'http://example.org/one/two' >>> norm_url('http://example.org/', 'http://example.net/one') 'http://example.net/one' >>> norm_url('http://example.org/', 'http://example.org//one') 'http://example.org//one' """ if "://" in url: return url parts = urlsplit(urljoin(base, url)) path = normpath(parts[2]) if sep != "/": path = "/".join(path.split(sep)) if parts[2].endswith("/") and not path.endswith("/"): path += "/" result = urlunsplit(parts[0:2] + (path,) + parts[3:]) if url.endswith("#") and not result.endswith("#"): result += "#" return result
# type error: Missing return statement
[docs]def context_from_urlinputsource(source: URLInputSource) -> Optional[str]: # type: ignore[return] """ Please note that JSON-LD documents served with the application/ld+json media type MUST have all context information, including references to external contexts, within the body of the document. Contexts linked via a http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be ignored for such documents. """ if source.content_type != "application/ld+json": try: # source.links is the new way of getting Link headers from URLInputSource links = source.links except AttributeError: # type error: Return value expected return # type: ignore[return-value] for link in links: if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: # type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]" return urljoin(source.url, link[i + 1 : j]) # type: ignore[type-var]
__all__ = [ "json", "source_to_json", "split_iri", "norm_url", "context_from_urlinputsource", ]