Source code for rdflib.plugins.sparql.results.tsvresults


"""
This implements the Tab Separated SPARQL Result Format

It is implemented with pyparsing, reusing the elements from the SPARQL Parser
"""

import codecs

from pyparsing import (
    Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress,
    FollowedBy, LineEnd)

from rdflib.query import Result, ResultParser

from rdflib.plugins.sparql.parser import (
    Var, STRING_LITERAL1, STRING_LITERAL2, IRIREF, BLANK_NODE_LABEL,
    NumericLiteral, BooleanLiteral, LANGTAG)
from rdflib.plugins.sparql.parserutils import Comp, Param, CompValue

from rdflib import Literal as RDFLiteral

from six import binary_type

ParserElement.setDefaultWhitespaceChars(" \n")


String = STRING_LITERAL1 | STRING_LITERAL2

RDFLITERAL = Comp('literal', Param('string', String) + Optional(
    Param('lang', LANGTAG.leaveWhitespace()
          ) | Literal('^^').leaveWhitespace(
    ) + Param('datatype', IRIREF).leaveWhitespace()))

NONE_VALUE = object()

EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)

TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral

ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()

HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()


[docs]class TSVResultParser(ResultParser):
[docs] def parse(self, source, content_type=None): if isinstance(source.read(0), binary_type): # if reading from source returns bytes do utf-8 decoding source = codecs.getreader('utf-8')(source) try: r = Result('SELECT') header = source.readline() r.vars = list(HEADER.parseString(header.strip(), parseAll=True)) r.bindings = [] while True: line = source.readline() if not line: break line = line.strip('\n') if line == "": continue row = ROW.parseString(line, parseAll=True) r.bindings.append( dict(zip(r.vars, (self.convertTerm(x) for x in row)))) return r except ParseException as err: print(err.line) print(" " * (err.column - 1) + "^") print(err)
[docs] def convertTerm(self, t): if t is NONE_VALUE: return None if isinstance(t, CompValue): if t.name == 'literal': return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype) else: raise Exception("I dont know how to handle this: %s" % (t,)) else: return t
if __name__ == '__main__': import sys r = Result.parse(file(sys.argv[1]), format='tsv') print(r.vars) print(r.bindings) # print r.serialize(format='json')