Source code for rdflib.plugins.parsers.pyRdfa.options

# -*- coding: utf-8 -*-
"""
L{Options} class: collect the possible options that govern the parsing possibilities. The module also includes the L{ProcessorGraph} class that handles the processor graph, per RDFa 1.1 (i.e., the graph containing errors and warnings).

@summary: RDFa parser (distiller)
@requires: U{RDFLib<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
"""

"""
$Id: options.py,v 1.20 2013-10-16 11:48:54 ivan Exp $ $Date: 2013-10-16 11:48:54 $
"""

import sys, datetime

import rdflib
from rdflib	import URIRef
from rdflib	import Literal
from rdflib	import BNode
from rdflib	import Namespace
if rdflib.__version__ >= "3.0.0" :
	from rdflib	import Graph
	from rdflib	import RDF  as ns_rdf
	from rdflib	import RDFS as ns_rdfs
else :
	from rdflib.Graph	import Graph
	from rdflib.RDFS	import RDFSNS as ns_rdfs
	from rdflib.RDF		import RDFNS  as ns_rdf

from .host 	import HostLanguage, MediaTypes, content_to_host_language, predefined_1_0_rel, require_embedded_rdf
from .		import ns_xsd, ns_distill, ns_rdfa
from . 		import RDFA_Error, RDFA_Warning, RDFA_Info
from .transform.lite import lite_prune

ns_dc = Namespace("http://purl.org/dc/terms/")
ns_ht = Namespace("http://www.w3.org/2006/http#")

[docs]class ProcessorGraph : """Wrapper around the 'processor graph', ie, the (RDF) Graph containing the warnings, error messages, and informational messages. """
[docs] def __init__(self) : self.graph = Graph()
[docs] def add_triples(self, msg, top_class, info_class, context, node) : """ Add an error structure to the processor graph: a bnode with a number of predicates. The structure follows U{the processor graph vocabulary<http://www.w3.org/2010/02/rdfa/wiki/Processor_Graph_Vocabulary>} as described on the RDFa WG Wiki page. @param msg: the core error message, added as an object to a dc:description @param top_class: Error, Warning, or Info; an explicit rdf:type added to the bnode @type top_class: URIRef @param info_class: An additional error class, added as an rdf:type to the bnode in case it is not None @type info_class: URIRef @param context: An additional information added, if not None, as an object with rdfa:context as a predicate @type context: either an URIRef or a URI String (an URIRef will be created in the second case) @param node: The node's element name that contains the error @type node: string @return: the bnode that serves as a subject for the errors. The caller may add additional information @rtype: BNode """ # Lazy binding of relevant prefixes self.graph.bind("dcterms", ns_dc) self.graph.bind("pyrdfa", ns_distill) self.graph.bind("rdf", ns_rdf) self.graph.bind("rdfa", ns_rdfa) self.graph.bind("ht", ns_ht) self.graph.bind("xsd", ns_xsd) # Python 3 foolproof way try : is_context_string = isinstance(context, basestring) except : is_context_string = isinstance(context, str) bnode = BNode() if node != None: try : full_msg = "[In element '%s'] %s" % (node.nodeName, msg) except : full_msg = "[In element '%s'] %s" % (node, msg) else : full_msg = msg self.graph.add((bnode, ns_rdf["type"], top_class)) if info_class : self.graph.add((bnode, ns_rdf["type"], info_class)) self.graph.add((bnode, ns_dc["description"], Literal(full_msg))) self.graph.add((bnode, ns_dc["date"], Literal(datetime.datetime.utcnow().isoformat(),datatype=ns_xsd["dateTime"]))) if context and (isinstance(context,URIRef) or is_context_string): htbnode = BNode() self.graph.add( (bnode, ns_rdfa["context"],htbnode) ) self.graph.add( (htbnode, ns_rdf["type"], ns_ht["Request"]) ) self.graph.add( (htbnode, ns_ht["requestURI"], Literal("%s" % context)) ) return bnode
[docs] def add_http_context(self, subj, http_code) : """ Add an additional HTTP context to a message with subject in C{subj}, using the U{<http://www.w3.org/2006/http#>} vocabulary. Typically used to extend an error structure, as created by L{add_triples}. @param subj: an RDFLib resource, typically a blank node @param http_code: HTTP status code """ bnode = BNode() self.graph.add((subj, ns_rdfa["context"], bnode)) self.graph.add((bnode, ns_rdf["type"], ns_ht["Response"])) self.graph.add((bnode, ns_ht["responseCode"], URIRef("http://www.w3.org/2006/http#%s" % http_code)))
[docs]class Options : """Settable options. An instance of this class is stored in the L{execution context<ExecutionContext>} of the parser. @ivar space_preserve: whether plain literals should preserve spaces at output or not @type space_preserve: Boolean @ivar output_default_graph: whether the 'default' graph should be returned to the user @type output_default_graph: Boolean @ivar output_processor_graph: whether the 'processor' graph should be returned to the user @type output_processor_graph: Boolean @ivar processor_graph: the 'processor' Graph @type processor_graph: L{ProcessorGraph} @ivar transformers: extra transformers @type transformers: list @ivar vocab_cache_report: whether the details of vocabulary file caching process should be reported as information (mainly for debug) @type vocab_cache_report: Boolean @ivar refresh_vocab_cache: whether the caching checks of vocabs should be by-passed, ie, if caches should be re-generated regardless of the stored date (important for vocab development) @type refresh_vocab_cache: Boolean @ivar embedded_rdf: whether embedded RDF (ie, turtle in an HTML script element or an RDF/XML content in SVG) should be extracted and added to the final graph. This is a non-standard option... @type embedded_rdf: Boolean @ivar vocab_expansion: whether the @vocab elements should be expanded and a mini-RDFS processing should be done on the merged graph @type vocab_expansion: Boolean @ivar vocab_cache: whether the system should use the vocabulary caching mechanism when expanding via the mini-RDFS, or should just fetch the graphs every time @type vocab_cache: Boolean @ivar host_language: the host language for the RDFa attributes. Default is HostLanguage.xhtml, but it can be HostLanguage.rdfa_core and HostLanguage.html5, or others... @type host_language: integer (logically: an enumeration) @ivar content_type: the content type of the host file. Default is None @type content_type: string (logically: an enumeration) @ivar add_informational_messages: whether informational messages should also be added to the processor graph, or only errors and warnings @ivar experimental_features: whether experimental features should be activated; that is a developer's option... @ivar check_lite: whether RDFa Lite should be checked, to generate warnings. """
[docs] def __init__(self, output_default_graph = True, output_processor_graph = False, space_preserve = True, transformers = [], embedded_rdf = True, vocab_expansion = False, vocab_cache = True, vocab_cache_report = False, refresh_vocab_cache = False, add_informational_messages = False, check_lite = False, experimental_features = False ) : self.space_preserve = space_preserve self.transformers = transformers self.processor_graph = ProcessorGraph() self.output_default_graph = output_default_graph self.output_processor_graph = output_processor_graph self.host_language = HostLanguage.rdfa_core self.vocab_cache_report = vocab_cache_report self.refresh_vocab_cache = refresh_vocab_cache self.embedded_rdf = embedded_rdf self.vocab_expansion = vocab_expansion self.vocab_cache = vocab_cache self.add_informational_messages = add_informational_messages self.check_lite = check_lite if check_lite : self.transformers.append(lite_prune) self.experimental_features = experimental_features
[docs] def set_host_language(self, content_type) : """ Set the host language for processing, based on the recognized types. If this is not a recognized content type, it falls back to RDFa core (i.e., XML) @param content_type: content type @type content_type: string """ if content_type in content_to_host_language : self.host_language = content_to_host_language[content_type] else : self.host_language = HostLanguage.rdfa_core if self.host_language in require_embedded_rdf : self.embedded_rdf = True
[docs] def __str__(self) : retval = """Current options: preserve space : %s output processor graph : %s output default graph : %s host language : %s accept embedded RDF : %s check rdfa lite : %s cache vocabulary graphs : %s """ return retval % (self.space_preserve, self.output_processor_graph, self.output_default_graph, self.host_language, self.embedded_rdf, self.check_lite, self.vocab_cache)
[docs] def reset_processor_graph(self): """Empty the processor graph. This is necessary if the same options is reused for several RDFa sources, and new error messages should be generated. """ self.processor_graph.graph.remove((None,None,None))
[docs] def add_warning(self, txt, warning_type=None, context=None, node=None, buggy_value=None) : """Add a warning to the processor graph. @param txt: the warning text. @keyword warning_type: Warning Class @type warning_type: URIRef @keyword context: possible context to be added to the processor graph @type context: URIRef or String @keyword buggy_value: a special case when a 'term' is not recognized; no warning is generated for that case if the value is part of the 'usual' XHTML terms, because almost all RDFa file contains some of those and that would pollute the output @type buggy_value: String """ if warning_type == ns_rdfa["UnresolvedTerm"] and buggy_value in predefined_1_0_rel : return return self.processor_graph.add_triples(txt, RDFA_Warning, warning_type, context, node)
[docs] def add_info(self, txt, info_type=None, context=None, node=None, buggy_value=None) : """Add an informational comment to the processor graph. @param txt: the information text. @keyword info_type: Info Class @type info_type: URIRef @keyword context: possible context to be added to the processor graph @type context: URIRef or String @keyword buggy_value: a special case when a 'term' is not recognized; no information is generated for that case if the value is part of the 'usual' XHTML terms, because almost all RDFa file contains some of those and that would pollute the output @type buggy_value: String """ if self.add_informational_messages : return self.processor_graph.add_triples(txt, RDFA_Info, info_type, context, node) else : return
[docs] def add_error(self, txt, err_type=None, context=None, node=None, buggy_value=None) : """Add an error to the processor graph. @param txt: the information text. @keyword err_type: Error Class @type err_type: URIRef @keyword context: possible context to be added to the processor graph @type context: URIRef or String @keyword buggy_value: a special case when a 'term' is not recognized; no error is generated for that case if the value is part of the 'usual' XHTML terms, because almost all RDFa file contains some of those and that would pollute the output @type buggy_value: String """ return self.processor_graph.add_triples(txt, RDFA_Error, err_type, context, node)