Source code for rdflib.plugins.sparql.sparql

from __future__ import annotations

import collections
import datetime
import itertools
import typing as t
from collections.abc import Mapping, MutableMapping
from typing import (
    TYPE_CHECKING,
    Any,
    Container,
    Dict,
    Generator,
    Iterable,
    List,
    Optional,
    Tuple,
    TypeVar,
    Union,
)

import isodate

import rdflib.plugins.sparql
from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.namespace import NamespaceManager
from rdflib.plugins.sparql.parserutils import CompValue
from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable

if TYPE_CHECKING:
    from rdflib.paths import Path


_AnyT = TypeVar("_AnyT")


[docs]class SPARQLError(Exception):
[docs]    def __init__(self, msg: Optional[str] = None):
        Exception.__init__(self, msg)


[docs]class NotBoundError(SPARQLError):
[docs]    def __init__(self, msg: Optional[str] = None):
        SPARQLError.__init__(self, msg)


[docs]class AlreadyBound(SPARQLError):  # noqa: N818
    """Raised when trying to bind a variable that is already bound!"""

[docs]    def __init__(self):
        SPARQLError.__init__(self)


[docs]class SPARQLTypeError(SPARQLError):
[docs]    def __init__(self, msg: Optional[str]):
        SPARQLError.__init__(self, msg)


[docs]class Bindings(MutableMapping):

    """

    A single level of a stack of variable-value bindings.
    Each dict keeps a reference to the dict below it,
    any failed lookup is propegated back

    In python 3.3 this could be a collections.ChainMap
    """

[docs]    def __init__(self, outer: Optional["Bindings"] = None, d=[]):
        self._d: Dict[str, str] = dict(d)
        self.outer = outer

[docs]    def __getitem__(self, key: str) -> str:
        if key in self._d:
            return self._d[key]

        if not self.outer:
            raise KeyError()
        return self.outer[key]

[docs]    def __contains__(self, key: Any) -> bool:
        try:
            self[key]
            return True
        except KeyError:
            return False

[docs]    def __setitem__(self, key: str, value: Any) -> None:
        self._d[key] = value

[docs]    def __delitem__(self, key: str) -> None:
        raise Exception("DelItem is not implemented!")

[docs]    def __len__(self) -> int:
        i = 0
        d: Optional[Bindings] = self
        while d is not None:
            i += len(d._d)
            d = d.outer
        return i

[docs]    def __iter__(self) -> Generator[str, None, None]:
        d: Optional[Bindings] = self
        while d is not None:
            yield from d._d
            d = d.outer

[docs]    def __str__(self) -> str:
        # type error: Generator has incompatible item type "Tuple[Any, str]"; expected "str"
        return "Bindings({" + ", ".join((k, self[k]) for k in self) + "})"  # type: ignore[misc]

[docs]    def __repr__(self) -> str:
        return str(self)


[docs]class FrozenDict(Mapping):
    """
    An immutable hashable dict

    Taken from http://stackoverflow.com/a/2704866/81121

    """

[docs]    def __init__(self, *args: Any, **kwargs: Any):
        self._d: Dict[Identifier, Identifier] = dict(*args, **kwargs)
        self._hash: Optional[int] = None

[docs]    def __iter__(self):
        return iter(self._d)

[docs]    def __len__(self) -> int:
        return len(self._d)

[docs]    def __getitem__(self, key: Identifier) -> Identifier:
        return self._d[key]

[docs]    def __hash__(self) -> int:
        # It would have been simpler and maybe more obvious to
        # use hash(tuple(sorted(self._d.items()))) from this discussion
        # so far, but this solution is O(n). I don't know what kind of
        # n we are going to run into, but sometimes it's hard to resist the
        # urge to optimize when it will gain improved algorithmic performance.
        if self._hash is None:
            self._hash = 0
            for key, value in self.items():
                self._hash ^= hash(key)
                self._hash ^= hash(value)
        return self._hash

[docs]    def project(self, vars: Container[Variable]) -> "FrozenDict":
        return FrozenDict((x for x in self.items() if x[0] in vars))

[docs]    def disjointDomain(self, other: t.Mapping[Identifier, Identifier]) -> bool:
        return not bool(set(self).intersection(other))

[docs]    def compatible(self, other: t.Mapping[Identifier, Identifier]) -> bool:
        for k in self:
            try:
                if self[k] != other[k]:
                    return False
            except KeyError:
                pass

        return True

[docs]    def merge(self, other: t.Mapping[Identifier, Identifier]) -> "FrozenDict":
        res = FrozenDict(itertools.chain(self.items(), other.items()))

        return res

[docs]    def __str__(self) -> str:
        return str(self._d)

[docs]    def __repr__(self) -> str:
        return repr(self._d)


[docs]class FrozenBindings(FrozenDict):
[docs]    def __init__(self, ctx: "QueryContext", *args, **kwargs):
        FrozenDict.__init__(self, *args, **kwargs)
        self.ctx = ctx

[docs]    def __getitem__(self, key: Union[Identifier, str]) -> Identifier:
        if not isinstance(key, Node):
            key = Variable(key)

        if not isinstance(key, (BNode, Variable)):
            return key

        if key not in self._d:
            # type error: Value of type "Optional[Dict[Variable, Identifier]]" is not indexable
            # type error: Invalid index type "Union[BNode, Variable]" for "Optional[Dict[Variable, Identifier]]"; expected type "Variable"
            return self.ctx.initBindings[key]  # type: ignore[index]
        else:
            return self._d[key]

[docs]    def project(self, vars: Container[Variable]) -> "FrozenBindings":
        return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in vars))

[docs]    def merge(self, other: t.Mapping[Identifier, Identifier]) -> "FrozenBindings":
        res = FrozenBindings(self.ctx, itertools.chain(self.items(), other.items()))
        return res

    @property
    def now(self) -> datetime.datetime:
        return self.ctx.now

    @property
    def bnodes(self) -> t.Mapping[Identifier, BNode]:
        return self.ctx.bnodes

    @property
    def prologue(self) -> Optional["Prologue"]:
        return self.ctx.prologue

[docs]    def forget(
        self, before: "QueryContext", _except: Optional[Container[Variable]] = None
    ) -> FrozenBindings:
        """
        return a frozen dict only of bindings made in self
        since before
        """
        if not _except:
            _except = []

        # bindings from initBindings are newer forgotten
        return FrozenBindings(
            self.ctx,
            (
                x
                for x in self.items()
                if (
                    x[0] in _except
                    # type error: Unsupported right operand type for in ("Optional[Dict[Variable, Identifier]]")
                    or x[0] in self.ctx.initBindings  # type: ignore[operator]
                    or before[x[0]] is None
                )
            ),
        )

[docs]    def remember(self, these) -> FrozenBindings:
        """
        return a frozen dict only of bindings in these
        """
        return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in these))


[docs]class QueryContext:
    """
    Query context - passed along when evaluating the query
    """

[docs]    def __init__(
        self,
        graph: Optional[Graph] = None,
        bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None,
        initBindings: Optional[Mapping[str, Identifier]] = None,
    ):
        self.initBindings = initBindings
        self.bindings = Bindings(d=bindings or [])
        if initBindings:
            self.bindings.update(initBindings)

        self.graph: Optional[Graph]
        self._dataset: Optional[ConjunctiveGraph]
        if isinstance(graph, ConjunctiveGraph):
            self._dataset = graph
            if rdflib.plugins.sparql.SPARQL_DEFAULT_GRAPH_UNION:
                self.graph = self.dataset
            else:
                self.graph = self.dataset.default_context
        else:
            self._dataset = None
            self.graph = graph

        self.prologue: Optional[Prologue] = None
        self._now: Optional[datetime.datetime] = None

        self.bnodes: t.MutableMapping[Identifier, BNode] = collections.defaultdict(
            BNode
        )

    @property
    def now(self) -> datetime.datetime:
        if self._now is None:
            self._now = datetime.datetime.now(isodate.tzinfo.UTC)
        return self._now

[docs]    def clone(
        self, bindings: Optional[Union[FrozenBindings, Bindings, List[Any]]] = None
    ) -> "QueryContext":
        r = QueryContext(
            self._dataset if self._dataset is not None else self.graph,
            bindings or self.bindings,
            initBindings=self.initBindings,
        )
        r.prologue = self.prologue
        r.graph = self.graph
        r.bnodes = self.bnodes
        return r

    @property
    def dataset(self) -> ConjunctiveGraph:
        """ "current dataset"""
        if self._dataset is None:
            raise Exception(
                "You performed a query operation requiring "
                + "a dataset (i.e. ConjunctiveGraph), but "
                + "operating currently on a single graph."
            )
        return self._dataset

[docs]    def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
        """
        Load data from the source into the query context's.

        :param source: The source to load from.
        :param default: If `True`, triples from the source will be added to the
            default graph, otherwise it will be loaded into a graph with
            ``source`` URI as its name.
        :param kwargs: Keyword arguments to pass to
            :meth:`rdflib.graph.Graph.parse`.
        """

        def _load(graph, source):
            try:
                return graph.parse(source, format="turtle", **kwargs)
            except Exception:
                pass
            try:
                return graph.parse(source, format="xml", **kwargs)
            except Exception:
                pass
            try:
                return graph.parse(source, format="n3", **kwargs)
            except Exception:
                pass
            try:
                return graph.parse(source, format="nt", **kwargs)
            except Exception:
                raise Exception(
                    "Could not load %s as either RDF/XML, N3 or NTriples" % source
                )

        if not rdflib.plugins.sparql.SPARQL_LOAD_GRAPHS:
            # we are not loading - if we already know the graph
            # being "loaded", just add it to the default-graph
            if default:
                # Unsupported left operand type for + ("None")
                self.graph += self.dataset.get_context(source)  # type: ignore[operator]
        else:
            if default:
                _load(self.graph, source)
            else:
                _load(self.dataset.get_context(source), source)

[docs]    def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
        # in SPARQL BNodes are just labels
        if not isinstance(key, (BNode, Variable)):
            return key
        try:
            return self.bindings[key]
        except KeyError:
            return None

[docs]    def get(self, key: str, default: Optional[Any] = None) -> Any:
        try:
            return self[key]
        except KeyError:
            return default

[docs]    def solution(self, vars: Optional[Iterable[Variable]] = None) -> FrozenBindings:
        """
        Return a static copy of the current variable bindings as dict
        """
        if vars:
            return FrozenBindings(
                self, ((k, v) for k, v in self.bindings.items() if k in vars)
            )
        else:
            return FrozenBindings(self, self.bindings.items())

[docs]    def __setitem__(self, key: str, value: str) -> None:
        if key in self.bindings and self.bindings[key] != value:
            raise AlreadyBound()

        self.bindings[key] = value

[docs]    def pushGraph(self, graph: Optional[Graph]) -> "QueryContext":
        r = self.clone()
        r.graph = graph
        return r

[docs]    def push(self) -> "QueryContext":
        r = self.clone(Bindings(self.bindings))
        return r

[docs]    def clean(self) -> "QueryContext":
        return self.clone([])

[docs]    def thaw(self, frozenbindings: FrozenBindings) -> "QueryContext":
        """
        Create a new read/write query context from the given solution
        """
        c = self.clone(frozenbindings)

        return c


[docs]class Prologue:
    """
    A class for holding prefixing bindings and base URI information
    """

[docs]    def __init__(self) -> None:
        self.base: Optional[str] = None
        self.namespace_manager = NamespaceManager(Graph())  # ns man needs a store

[docs]    def resolvePName(self, prefix: Optional[str], localname: Optional[str]) -> URIRef:
        ns = self.namespace_manager.store.namespace(prefix or "")
        if ns is None:
            raise Exception("Unknown namespace prefix : %s" % prefix)
        return URIRef(ns + (localname or ""))

[docs]    def bind(self, prefix: Optional[str], uri: Any) -> None:
        self.namespace_manager.bind(prefix, uri, replace=True)

[docs]    def absolutize(
        self, iri: Optional[Union[CompValue, str]]
    ) -> Optional[Union[CompValue, str]]:
        """
        Apply BASE / PREFIXes to URIs
        (and to datatypes in Literals)

        TODO: Move resolving URIs to pre-processing
        """

        if isinstance(iri, CompValue):
            if iri.name == "pname":
                return self.resolvePName(iri.prefix, iri.localname)
            if iri.name == "literal":
                # type error: Argument "datatype" to "Literal" has incompatible type "Union[CompValue, Identifier, None]"; expected "Optional[str]"
                return Literal(
                    iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype)  # type: ignore[arg-type]
                )
        elif isinstance(iri, URIRef) and not ":" in iri:  # noqa: E713
            return URIRef(iri, base=self.base)

        return iri


[docs]class Query:
    """
    A parsed and translated query
    """

[docs]    def __init__(self, prologue: Prologue, algebra: CompValue):
        self.prologue = prologue
        self.algebra = algebra
        self._original_args: Tuple[str, Mapping[str, str], Optional[str]]


[docs]class Update:
    """
    A parsed and translated update
    """

[docs]    def __init__(self, prologue: Prologue, algebra: List[CompValue]):
        self.prologue = prologue
        self.algebra = algebra
        self._original_args: Tuple[str, Mapping[str, str], Optional[str]]