Source code for rdflib.plugins.parsers.pyRdfa.transform.DublinCore

# -*- coding: utf-8 -*-
"""
Transfomer: handles the Dublin Core recommendation for XHTML for adding DC values. What this means is that:

 - DC namespaces are defined via C{<link rel="schema.XX" value="...."/>}
 - The 'XX.term' is used much like QNames in C{<link>} and C{<meta>} elements. For the latter, the namespaced names are added to a C{@property} attribute.

This transformer adds "real" namespaces and changes the DC references in link and meta elements to abide to the
RDFa namespace syntax.

@summary: Dublin Core transformer
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
@contact: Ivan Herman, ivan@w3.org
"""

"""
@version: $Id: DublinCore.py,v 1.4 2012-01-18 14:16:44 ivan Exp $
$Date: 2012-01-18 14:16:44 $
"""

[docs]def DC_transform(html, options, state) : """ @param html: a DOM node for the top level html element @param options: invocation options @type options: L{Options<pyRdfa.options>} @param state: top level execution state @type state: L{State<pyRdfa.state>} """ from ..host import HostLanguage if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ) : return # the head element is necessary; to be sure, the namespaces are set # on that level only head = None try : head = html.getElementsByTagName("head")[0] except : # no head.... return # At first, the DC namespaces must be found dcprefixes = {} for link in html.getElementsByTagName("link") : if link.hasAttribute("rel") : rel = link.getAttribute("rel") uri = link.getAttribute("href") if uri != None and rel != None and rel.startswith("schema.") : # bingo... try : localname = rel.split(".")[1] head.setAttributeNS("", "xmlns:"+localname,uri) dcprefixes[localname] = uri except : # problem with the split; just ignore pass # get the link elements now to find the dc elements for link in html.getElementsByTagName("link") : if link.hasAttribute("rel") : newProp = "" for rel in link.getAttribute("rel").strip().split() : # see if there is '.' to separate the attributes if rel.find(".") != -1 : key = rel.split(".",1)[0] lname = rel.split(".",1)[1] if key in dcprefixes and lname != "" : # yep, this is one of those... newProp += " " + key + ":" + lname else : newProp += " " + rel else : newProp += " " + rel link.setAttribute("rel",newProp.strip()) # do almost the same with the meta elements... for meta in html.getElementsByTagName("meta") : if meta.hasAttribute("name") : newProp = "" for name in meta.getAttribute("name").strip().split() : # see if there is '.' to separate the attributes if name.find(".") != -1 : key = name.split(".",1)[0] lname = name.split(".",1)[1] if key in dcprefixes and lname != "" : # yep, this is one of those... newProp += " " + key + ":" + lname else : newProp += " " + name else : newProp += " " + name meta.setAttribute("property", newProp.strip())