Source code for rdflib.plugins.parsers.pyRdfa.transform.DublinCore

# -*- coding: utf-8 -*-
"""
Transfomer: handles the Dublin Core recommendation for XHTML for adding DC values. What this means is that:

 - DC namespaces are defined via C{<link rel="schema.XX" value="...."/>}
 - The 'XX.term' is used much like QNames in C{<link>} and C{<meta>} elements. For the latter, the namespaced names are added to a C{@property} attribute.

This transformer adds "real" namespaces and changes the DC references in link and meta elements to abide to the
RDFa namespace syntax.

@summary: Dublin Core transformer
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
@contact: Ivan Herman, ivan@w3.org
"""

"""
@version: $Id: DublinCore.py,v 1.4 2012-01-18 14:16:44 ivan Exp $
$Date: 2012-01-18 14:16:44 $
"""

[docs]def DC_transform(html, options, state) :
	"""
	@param html: a DOM node for the top level html element
	@param options: invocation options
	@type options: L{Options<pyRdfa.options>}
	@param state: top level execution state
	@type state: L{State<pyRdfa.state>}
	"""
	from ..host import HostLanguage
	if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ) :
		return

	# the head element is necessary; to be sure, the namespaces are set
	# on that level only
	head = None
	try :
		head = html.getElementsByTagName("head")[0]
	except :
		# no head....
		return

	# At first, the DC namespaces must be found
	dcprefixes = {}
	for link in html.getElementsByTagName("link") :
		if link.hasAttribute("rel") :
			rel = link.getAttribute("rel")
			uri = link.getAttribute("href")
			if uri != None and rel != None and rel.startswith("schema.") :
				# bingo...
				try :
					localname = rel.split(".")[1]
					head.setAttributeNS("", "xmlns:"+localname,uri)
					dcprefixes[localname] = uri
				except :
					# problem with the split; just ignore
					pass

	# get the link elements now to find the dc elements
	for link in html.getElementsByTagName("link") :
		if link.hasAttribute("rel") :
			newProp = ""
			for rel in link.getAttribute("rel").strip().split() :
				# see if there is '.' to separate the attributes
				if rel.find(".") != -1 :
					key   = rel.split(".",1)[0]
					lname = rel.split(".",1)[1]
					if key in dcprefixes and lname != "" :
						# yep, this is one of those...
						newProp += " " + key + ":" + lname
					else :
						newProp += " " + rel
				else :
					newProp += " " + rel
			link.setAttribute("rel",newProp.strip())

	# do almost the same with the meta elements...
	for meta in html.getElementsByTagName("meta") :
		if meta.hasAttribute("name") :
			newProp = ""
			for name in meta.getAttribute("name").strip().split() :
				# see if there is '.' to separate the attributes
				if name.find(".") != -1 :
					key   = name.split(".",1)[0]
					lname = name.split(".",1)[1]
					if key in dcprefixes and lname != "" :
						# yep, this is one of those...
						newProp += " " + key + ":" + lname
					else :
						newProp += " " + name
				else :
					newProp += " " + name
			meta.setAttribute("property", newProp.strip())