Source code for rdflib.extras.external_graph_libs

#!/usr/bin/env python2.7
# encoding: utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

"""Convert (to and) from rdflib graphs to other well known graph libraries.

Currently the following libraries are supported:
- networkx: MultiDiGraph, DiGraph, Graph
- graph_tool: Graph

Doctests in this file are all skipped, as we can't run them conditionally if
networkx or graph_tool are available and they would err otherwise.
see ../../test/test_extras_external_graph_libs.py for conditional tests
"""

import logging
logger = logging.getLogger(__name__)


def _identity(x): return x


def _rdflib_to_networkx_graph(
        graph,
        nxgraph,
        calc_weights,
        edge_attrs,
        transform_s=_identity, transform_o=_identity):
    """Helper method for multidigraph, digraph and graph.

    Modifies nxgraph in-place!

    Arguments:
        graph: an rdflib.Graph.
        nxgraph: a networkx.Graph/DiGraph/MultiDigraph.
        calc_weights: If True adds a 'weight' attribute to each edge according
            to the count of s,p,o triples between s and o, which is meaningful
            for Graph/DiGraph.
        edge_attrs: Callable to construct edge data from s, p, o.
           'triples' attribute is handled specially to be merged.
           'weight' should not be generated if calc_weights==True.
           (see invokers below!)
        transform_s: Callable to transform node generated from s.
        transform_o: Callable to transform node generated from o.
    """
    assert callable(edge_attrs)
    assert callable(transform_s)
    assert callable(transform_o)
    import networkx as nx
    for s, p, o in graph:
        ts, to = transform_s(s), transform_o(o)  # apply possible transformations
        data = nxgraph.get_edge_data(ts, to)
        if data is None or isinstance(nxgraph, nx.MultiDiGraph):
            # no edge yet, set defaults
            data = edge_attrs(s, p, o)
            if calc_weights:
                data['weight'] = 1
            nxgraph.add_edge(ts, to, **data)
        else:
            # already have an edge, just update attributes
            if calc_weights:
                data['weight'] += 1
            if 'triples' in data:
                d = edge_attrs(s, p, o)
                data['triples'].extend(d['triples'])


[docs]def rdflib_to_networkx_multidigraph( graph, edge_attrs=lambda s, p, o: {'key': p}, **kwds): """Converts the given graph into a networkx.MultiDiGraph. The subjects and objects are the later nodes of the MultiDiGraph. The predicates are used as edge keys (to identify multi-edges). Arguments: graph: a rdflib.Graph. edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the MultiDiGraph key=p here. If you don't want to be able to re-identify the edge later on, you can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's default (increasing ints) will be used. Returns: networkx.MultiDiGraph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> mdg = rdflib_to_networkx_multidigraph(g) >>> len(mdg.edges()) 4 >>> mdg.has_edge(a, b) True >>> mdg.has_edge(a, b, key=p) True >>> mdg.has_edge(a, b, key=q) True >>> mdg = rdflib_to_networkx_multidigraph(g, edge_attrs=lambda s,p,o: {}) >>> mdg.has_edge(a, b, key=0) True >>> mdg.has_edge(a, b, key=1) True """ import networkx as nx mdg = nx.MultiDiGraph() _rdflib_to_networkx_graph(graph, mdg, False, edge_attrs, **kwds) return mdg
[docs]def rdflib_to_networkx_digraph( graph, calc_weights=True, edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, **kwds): """Converts the given graph into a networkx.DiGraph. As an rdflib.Graph() can contain multiple edges between nodes, by default adds the a 'triples' attribute to the single DiGraph edge with a list of all triples between s and o. Also by default calculates the edge weight as the length of triples. Args: graph: a rdflib.Graph. calc_weights: If true calculate multi-graph edge-count as edge 'weight' edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the 'triples' attribute here, which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to `lambda s, p, o: {}`. Returns: networkx.DiGraph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> dg = rdflib_to_networkx_digraph(g) >>> dg[a][b]['weight'] 2 >>> sorted(dg[a][b]['triples']) == [(a, p, b), (a, q, b)] True >>> len(dg.edges()) 3 >>> dg.size() 3 >>> dg.size(weight='weight') 4.0 >>> dg = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) >>> 'weight' in dg[a][b] False >>> 'triples' in dg[a][b] False """ import networkx as nx dg = nx.DiGraph() _rdflib_to_networkx_graph(graph, dg, calc_weights, edge_attrs, **kwds) return dg
[docs]def rdflib_to_networkx_graph( graph, calc_weights=True, edge_attrs=lambda s, p, o: {'triples': [(s, p, o)]}, **kwds): """Converts the given graph into a networkx.Graph. As an rdflib.Graph() can contain multiple directed edges between nodes, by default adds the a 'triples' attribute to the single DiGraph edge with a list of triples between s and o in graph. Also by default calculates the edge weight as the len(triples). Args: graph: a rdflib.Graph. calc_weights: If true calculate multi-graph edge-count as edge 'weight' edge_attrs: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, **attrs) function. By default this will include setting the 'triples' attribute here, which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to `lambda s, p, o: {}`. Returns: networkx.Graph >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> ug = rdflib_to_networkx_graph(g) >>> ug[a][b]['weight'] 3 >>> sorted(ug[a][b]['triples']) == [(a, p, b), (a, q, b), (b, p, a)] True >>> len(ug.edges()) 2 >>> ug.size() 2 >>> ug.size(weight='weight') 4.0 >>> ug = rdflib_to_networkx_graph(g, False, edge_attrs=lambda s,p,o:{}) >>> 'weight' in ug[a][b] False >>> 'triples' in ug[a][b] False """ import networkx as nx g = nx.Graph() _rdflib_to_networkx_graph(graph, g, calc_weights, edge_attrs, **kwds) return g
[docs]def rdflib_to_graphtool( graph, v_prop_names=[str('term')], e_prop_names=[str('term')], transform_s=lambda s, p, o: {str('term'): s}, transform_p=lambda s, p, o: {str('term'): p}, transform_o=lambda s, p, o: {str('term'): o}, ): """Converts the given graph into a graph_tool.Graph(). The subjects and objects are the later vertices of the Graph. The predicates become edges. Arguments: graph: a rdflib.Graph. v_prop_names: a list of names for the vertex properties. The default is set to ['term'] (see transform_s, transform_o below). e_prop_names: a list of names for the edge properties. transform_s: callable with s, p, o input. Should return a dictionary containing a value for each name in v_prop_names. By default is set to {'term': s} which in combination with v_prop_names = ['term'] adds s as 'term' property to the generated vertex for s. transform_p: similar to transform_s, but wrt. e_prop_names. By default returns {'term': p} which adds p as a property to the generated edge between the vertex for s and the vertex for o. transform_o: similar to transform_s. Returns: graph_tool.Graph() >>> from rdflib import Graph, URIRef, Literal >>> g = Graph() >>> a, b, l = URIRef('a'), URIRef('b'), Literal('l') >>> p, q = URIRef('p'), URIRef('q') >>> edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] >>> for t in edges: ... g.add(t) ... >>> mdg = rdflib_to_graphtool(g) >>> len(list(mdg.edges())) 4 >>> from graph_tool import util as gt_util >>> vpterm = mdg.vertex_properties['term'] >>> va = gt_util.find_vertex(mdg, vpterm, a)[0] >>> vb = gt_util.find_vertex(mdg, vpterm, b)[0] >>> vl = gt_util.find_vertex(mdg, vpterm, l)[0] >>> (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] True >>> epterm = mdg.edge_properties['term'] >>> len(list(gt_util.find_edge(mdg, epterm, p))) == 3 True >>> len(list(gt_util.find_edge(mdg, epterm, q))) == 1 True >>> mdg = rdflib_to_graphtool( ... g, ... e_prop_names=[str('name')], ... transform_p=lambda s, p, o: {str('name'): unicode(p)}) >>> epterm = mdg.edge_properties['name'] >>> len(list(gt_util.find_edge(mdg, epterm, unicode(p)))) == 3 True >>> len(list(gt_util.find_edge(mdg, epterm, unicode(q)))) == 1 True """ import graph_tool as gt g = gt.Graph() vprops = [(vpn, g.new_vertex_property('object')) for vpn in v_prop_names] for vpn, vprop in vprops: g.vertex_properties[vpn] = vprop eprops = [(epn, g.new_edge_property('object')) for epn in e_prop_names] for epn, eprop in eprops: g.edge_properties[epn] = eprop node_to_vertex = {} for s, p, o in graph: sv = node_to_vertex.get(s) if sv is None: v = g.add_vertex() node_to_vertex[s] = v tmp_props = transform_s(s, p, o) for vpn, vprop in vprops: vprop[v] = tmp_props[vpn] sv = v ov = node_to_vertex.get(o) if ov is None: v = g.add_vertex() node_to_vertex[o] = v tmp_props = transform_o(s, p, o) for vpn, vprop in vprops: vprop[v] = tmp_props[vpn] ov = v e = g.add_edge(sv, ov) tmp_props = transform_p(s, p, o) for epn, eprop in eprops: eprop[e] = tmp_props[epn] return g
if __name__ == '__main__': import sys import logging.config logging.basicConfig(level=logging.DEBUG) import nose nose.run(argv=[sys.argv[0], sys.argv[0], '-v', '--without-doctest'])