Source code for rdflib.plugins.parsers.hext
"""
This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON
(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to
handle contexts, i.e. multiple graphs.
"""
import json
import warnings
from typing import List, Union
from rdflib import BNode, ConjunctiveGraph, Literal, URIRef
from rdflib.parser import Parser
__all__ = ["HextuplesParser"]
[docs]class HextuplesParser(Parser):
"""
An RDFLib parser for Hextuples
"""
[docs] def __init__(self):
pass
def _load_json_line(self, line: str):
# this complex handing is because the 'value' component is
# allowed to be "" but not None
# all other "" values are treated as None
ret1 = json.loads(line)
ret2 = [x if x != "" else None for x in ret1]
if ret1[2] == "":
ret2[2] = ""
return ret2
def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]):
# all values check
# subject, predicate, value, datatype cannot be None
# language and graph may be None
if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None:
raise ValueError(
"subject, predicate, value, datatype cannot be None. Given: " f"{tup}"
)
# 1 - subject
s: Union[URIRef, BNode]
if tup[0].startswith("_"):
s = BNode(value=tup[0].replace("_:", ""))
else:
s = URIRef(tup[0])
# 2 - predicate
p = URIRef(tup[1])
# 3 - value
o: Union[URIRef, BNode, Literal]
if tup[3] == "globalId":
o = URIRef(tup[2])
elif tup[3] == "localId":
o = BNode(value=tup[2].replace("_:", ""))
else: # literal
if tup[4] is None:
o = Literal(tup[2], datatype=URIRef(tup[3]))
else:
o = Literal(tup[2], lang=tup[4])
# 6 - context
if tup[5] is not None:
c = URIRef(tup[5])
cg.add((s, p, o, c))
else:
cg.add((s, p, o))
[docs] def parse(self, source, graph, **kwargs):
if kwargs.get("encoding") not in [None, "utf-8"]:
warnings.warn(
f"Hextuples files are always utf-8 encoded, "
f"I was passed: {kwargs.get('encoding')}, "
"but I'm still going to use utf-8"
)
assert (
graph.store.context_aware
), "Hextuples Parser needs a context-aware store!"
cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
cg.default_context = graph
# handle different source types - only file and string (data) for now
if hasattr(source, "file"):
with open(source.file.name) as fp:
for l in fp:
self._parse_hextuple(cg, self._load_json_line(l))
elif hasattr(source, "_InputSource__bytefile"):
if hasattr(source._InputSource__bytefile, "wrapped"):
for l in source._InputSource__bytefile.wrapped.strip().splitlines():
self._parse_hextuple(cg, self._load_json_line(l))