Source code for rdflib.plugins.parsers.pyRdfa.rdfs.process

# -*- coding: utf-8 -*-
"""
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}

"""

"""
$Id: process.py,v 1.7 2012-03-23 14:06:38 ivan Exp $ $Date: 2012-03-23 14:06:38 $

"""

import sys
import os

import rdflib
from rdflib	import URIRef
from rdflib	import Literal
from rdflib	import BNode
from rdflib	import Namespace
if rdflib.__version__ >= "3.0.0" :
	from rdflib	import RDF  as ns_rdf
	from rdflib	import RDFS as ns_rdfs
	from rdflib	import Graph
else :
	from rdflib.RDFS	import RDFSNS as ns_rdfs
	from rdflib.RDF		import RDFNS  as ns_rdf
	from rdflib.Graph import Graph

ns_owl = Namespace("http://www.w3.org/2002/07/owl#")

from ..host import MediaTypes

from ..utils	import URIOpener

from . import err_outdated_cache
from . import err_unreachable_vocab
from . import err_unparsable_Turtle_vocab
from . import err_unparsable_xml_vocab
from . import err_unparsable_ntriples_vocab
from . import err_unparsable_rdfa_vocab
from . import err_unrecognised_vocab_type

from .. import VocabReferenceError

from .cache import CachedVocab
from .. import HTTPError, RDFaError

#############################################################################################################

[docs]def return_graph(uri, options, newCache = False) :
	"""Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of
	RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked
	on the RDFa content.

	The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used.

	This function is used to retreive the vocabulary file and turn it into an RDFLib graph.

	@param uri: URI for the graph
	@param options: used as a place where warnings can be sent
	@param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text
	@return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful
	"""
	def return_to_cache(msg) :
		if newCache :
			options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError)
		else :
			options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError)

	retval 			= None
	expiration_date = None
	content			= None

	try :
		content = URIOpener(uri,
							{'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'})
	except HTTPError :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)
	except RDFaError :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)
	except Exception :
		(type,value,traceback) = sys.exc_info()
		return_to_cache(value)
		return (None,None)

	# Store the expiration date of the newly accessed data
	expiration_date = content.expiration_date

	if content.content_type == MediaTypes.turtle :
		try :
			retval = Graph()
			retval.parse(content.data, format="n3")
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
	elif content.content_type == MediaTypes.rdfxml :
		try :
			retval = Graph()
			retval.parse(content.data)
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_Turtle_vocab % (uri,value))
	elif content.content_type == MediaTypes.nt :
		try :
			retval = Graph()
			retval.parse(content.data, format="nt")
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_ntriples_vocab % (uri,value))
	elif content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None :
		try :
			from pyRdfa import pyRdfa
			from pyRdfa.options	import Options
			options = Options()
			retval = pyRdfa(options).graph_from_source(content.data)
		except :
			(type,value,traceback) = sys.exc_info()
			options.add_warning(err_unparsable_rdfa_vocab % (uri,value))
	else :
		options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type))

	return (retval, expiration_date)

############################################################################################
type 				= ns_rdf["type"]
Property 			= ns_rdf["Property"]
Class 				= ns_rdfs["Class"]
subClassOf			= ns_rdfs["subClassOf"]
subPropertyOf		= ns_rdfs["subPropertyOf"]
equivalentProperty	= ns_owl["equivalentProperty"]
equivalentClass 	= ns_owl["equivalentClass"]

[docs]class MiniOWL :
	"""
	Class implementing the simple OWL RL Reasoning required by RDFa in managing vocabulary files. This is done via
	a forward chaining process (in the L{closure} method) using a few simple rules as defined by the RDF and the OWL Semantics
	specifications.

	@ivar graph: the graph that has to be expanded
	@ivar added_triples: each cycle collects the triples that are to be added to the graph eventually.
	@type added_triples: a set, to ensure the unicity of triples being added
	"""
[docs]	def __init__(self, graph, schema_semantics = False) :
		self.graph         		= graph
		self.added_triples 		= None
		self.schema_semantics 	= schema_semantics

[docs]	def closure(self) :
		"""
		   Generate the closure the graph. This is the real 'core'.

		   The processing rules store new triples via the L{separate method<store_triple>} which stores
		   them in the L{added_triples<added_triples>} array. If that array is emtpy at the end of a cycle,
		   it means that the whole process can be stopped.
		"""

		# Go cyclically through all rules until no change happens
		new_cycle = True
		cycle_num = 0
		while new_cycle :
			# yes, there was a change, let us go again
			cycle_num += 1

			# go through all rules, and collect the replies (to see whether any change has been done)
			# the new triples to be added are collected separately not to interfere with
			# the current graph yet
			self.added_triples = set()

			# Execute all the rules; these might fill up the added triples array
			for t in self.graph : self.rules(t)

			# Add the tuples to the graph (if necessary, that is). If any new triple has been generated, a new cycle
			# will be necessary...
			new_cycle = len(self.added_triples) > 0

			for t in self.added_triples : self.graph.add(t)

[docs]	def store_triple(self, t) :
		"""
		In contrast to its name, this does not yet add anything to the graph itself, it just stores the tuple in an
		L{internal set<added_triples>}. (It is important for this to be a set: some of the rules in the various closures may
		generate the same tuples several times.) Before adding the tuple to the set, the method checks whether
		the tuple is in the final graph already (if yes, it is not added to the set).

		The set itself is emptied at the start of every processing cycle; the triples are then effectively added to the
		graph at the end of such a cycle. If the set is
		actually empty at that point, this means that the cycle has not added any new triple, and the full processing can stop.

		@param t: the triple to be added to the graph, unless it is already there
		@type t: a 3-element tuple of (s,p,o)
		"""
		(s,p,o) = t
		if t not in self.graph :
			self.added_triples.add(t)

[docs]	def rules(self, t) :
		"""
			Go through the OWL-RL entailement rules prp-spo1, prp-eqp1, prp-eqp2, cax-sco, cax-eqc1, and cax-eqc2 by extending the graph.
			@param t: a triple (in the form of a tuple)
		"""
		s,p,o = t
		if self.schema_semantics :
			# extra resonings on the vocabulary only to reduce the overall load by reducing the expected number of chaining cycles
			if p == subPropertyOf :
				for Z,Y,xxx in self.graph.triples((o, subPropertyOf, None)) :
					self.store_triple((s,subPropertyOf,xxx))
			elif p == equivalentProperty :
				for Z,Y,xxx in self.graph.triples((o, equivalentProperty, None)) :
					self.store_triple((s,equivalentProperty,xxx))
				for xxx,Y,Z in self.graph.triples((None, equivalentProperty, s)) :
					self.store_triple((xxx,equivalentProperty,o))
			elif p == subClassOf :
				for Z,Y,xxx in self.graph.triples((o, subClassOf, None)) :
					self.store_triple((s,subClassOf,xxx))
			elif p == equivalentClass :
				for Z,Y,xxx in self.graph.triples((o, equivalentClass, None)) :
					self.store_triple((s,equivalentClass,xxx))
				for xxx,Y,Z in self.graph.triples((None, equivalentClass, s)) :
					self.store_triple((xxx,equivalentClass,o))
		else :
			if p == subPropertyOf :
				# prp-spo1
				for zzz,Z,www in self.graph.triples((None, s, None)) :
					self.store_triple((zzz, o, www))
			elif p == equivalentProperty :
				# prp-eqp1
				for zzz,Z,www in self.graph.triples((None, s, None)) :
					self.store_triple((zzz, o, www))
				# prp-eqp2
				for zzz,Z,www in self.graph.triples((None, o, None)) :
					self.store_triple((zzz, s, www))
			elif p == subClassOf :
				# cax-sco
				for vvv,Y,Z in self.graph.triples((None, type, s)) :
					self.store_triple((vvv, type, o))
			elif p == equivalentClass :
				# cax-eqc1
				for vvv,Y,Z in self.graph.triples((None, type, s)) :
					self.store_triple((vvv, type, o))
				# cax-eqc2
				for vvv,Y,Z in self.graph.triples((None, type, o)) :
					self.store_triple((vvv, type, s))

########################################################################################################

[docs]def process_rdfa_sem(graph, options) :
	"""
	Expand the graph through the minimal RDFS and OWL rules defined for RDFa.

	The expansion is done in several steps:
	 1. the vocabularies are retrieved from the incoming graph (there are RDFa triples generated for that)
	 2. all vocabularies are merged into a separate vocabulary graph
	 3. the RDFS/OWL expansion is done on the vocabulary graph, to take care of all the subproperty, subclass, etc, chains
	 4. the (expanded) vocabulary graph content is added to the incoming graph
	 5. the incoming graph is expanded
	 6. the triples appearing in the vocabulary graph are removed from the incoming graph, to avoid unnecessary extra triples from the data

	@param graph: an RDFLib Graph instance, to be expanded
	@param options: options as defined for the RDFa run; used to generate warnings
	@type options: L{pyRdfa.Options}
	"""
	# 1. collect the vocab URI-s
	vocabs = set()
	from pyRdfa import RDFA_VOCAB
	for ((s,p,v)) in graph.triples((None,RDFA_VOCAB,None)) :
		vocabs.add((str(v)))

	if len(vocabs) >= 0 :
		# 2. get all the vocab graphs
		vocab_graph = Graph()
		for uri in vocabs :
			if options.vocab_cache :
				v_graph = CachedVocab(uri, options).graph
			else :
				(v_graph, exp_date) = return_graph(uri, options)
			if v_graph != None :
				for t in v_graph :
					vocab_graph.add(t)

		# 3. Get the closure of the vocab graph; this will take care of local subproperty, etc, statements
		# Strictly speaking this is not necessary, but will speed up processing, because it may save chaining cycles on the
		# real graph
		MiniOWL(vocab_graph, schema_semantics = True).closure()

		# 4. Now get the vocab graph content added to the default graph
		for t in vocab_graph :
			graph.add(t)

		# 5. get the graph expanded through RDFS
		MiniOWL(graph).closure()

		# 4. clean up the graph by removing the schema triples
		for t in vocab_graph : graph.remove(t)

	# That was it...
	return graph