| 1 | #!/usr/bin/env python |
|---|
| 2 | # |
|---|
| 3 | # Copyright 2008 Digital Bazaar, Inc. |
|---|
| 4 | # |
|---|
| 5 | # This file is part of librdfa. |
|---|
| 6 | # |
|---|
| 7 | # librdfa is Free Software, and can be licensed under any of the |
|---|
| 8 | # following three licenses: |
|---|
| 9 | # |
|---|
| 10 | # 1. GNU Lesser General Public License (LGPL) V2.1 or any |
|---|
| 11 | # newer version |
|---|
| 12 | # 2. GNU General Public License (GPL) V2 or any newer version |
|---|
| 13 | # 3. Apache License, V2.0 or any newer version |
|---|
| 14 | # |
|---|
| 15 | # You may not use this file except in compliance with at least one of |
|---|
| 16 | # the above three licenses. |
|---|
| 17 | # |
|---|
| 18 | # See LICENSE-* at the top of this software distribution for more |
|---|
| 19 | # information regarding the details of each license. |
|---|
| 20 | # |
|---|
| 21 | # Reads in an XHTML+RDFa file and outputs the triples generated by the file |
|---|
| 22 | # in N3 format. |
|---|
| 23 | import sys, os |
|---|
| 24 | sys.path += ("../python/dist",) |
|---|
| 25 | import rdfa |
|---|
| 26 | |
|---|
| 27 | ## |
|---|
| 28 | # Formats a triple in a very special way. |
|---|
| 29 | # |
|---|
| 30 | # @param n3 the HTTP request to use when writing the output. |
|---|
| 31 | # @param subject the subject of the triple. |
|---|
| 32 | # @param predicate the predicate for the triple. |
|---|
| 33 | # @param obj the object of the triple. |
|---|
| 34 | # @param object_type the type for the object in the triple. |
|---|
| 35 | # @param datatype the datatype for the object in the triple. |
|---|
| 36 | # @param language the language for the object in the triple. |
|---|
| 37 | def write_triple( \ |
|---|
| 38 | n3, subject, predicate, obj, object_type, datatype, language): |
|---|
| 39 | print "<%s>" % (subject,) |
|---|
| 40 | print " <%s>" % (predicate,) |
|---|
| 41 | if(object_type == rdfa.RDF_TYPE_IRI): |
|---|
| 42 | print " <%s> . " % (obj,) |
|---|
| 43 | else: |
|---|
| 44 | ostr = " \"%s\"" % (obj,) |
|---|
| 45 | if(language != None): |
|---|
| 46 | ostr += "@%s" % (language,) |
|---|
| 47 | if(datatype != None): |
|---|
| 48 | ostr += "^^^<%s>" % (datatype,) |
|---|
| 49 | print ostr + " ." |
|---|
| 50 | |
|---|
| 51 | ## |
|---|
| 52 | # Called whenever a triple is generated for the default graph by the |
|---|
| 53 | # underlying implementation. |
|---|
| 54 | # |
|---|
| 55 | # @param n3 the output buffer |
|---|
| 56 | # @param subject the subject of the triple. |
|---|
| 57 | # @param predicate the predicate for the triple. |
|---|
| 58 | # @param obj the object of the triple. |
|---|
| 59 | # @param object_type the type for the object in the triple. |
|---|
| 60 | # @param datatype the datatype for the object in the triple. |
|---|
| 61 | # @param language the language for the object in the triple. |
|---|
| 62 | def default_triple( \ |
|---|
| 63 | n3, subject, predicate, obj, object_type, datatype, language): |
|---|
| 64 | |
|---|
| 65 | write_triple( \ |
|---|
| 66 | n3, subject, predicate, obj, object_type, datatype, language) |
|---|
| 67 | |
|---|
| 68 | ## |
|---|
| 69 | # Called whenever a triple is generated for the processor graph by the |
|---|
| 70 | # underlying implementation. |
|---|
| 71 | # |
|---|
| 72 | # @param n3 the output buffer |
|---|
| 73 | # @param subject the subject of the triple. |
|---|
| 74 | # @param predicate the predicate for the triple. |
|---|
| 75 | # @param obj the object of the triple. |
|---|
| 76 | # @param object_type the type for the object in the triple. |
|---|
| 77 | # @param datatype the datatype for the object in the triple. |
|---|
| 78 | # @param language the language for the object in the triple. |
|---|
| 79 | def processor_triple( \ |
|---|
| 80 | n3, subject, predicate, obj, object_type, datatype, language): |
|---|
| 81 | |
|---|
| 82 | if(object_type == rdfa.RDF_TYPE_NAMESPACE_PREFIX): |
|---|
| 83 | print "%s %s: <%s> ." % (subject, predicate, obj) |
|---|
| 84 | else: |
|---|
| 85 | write_triple( \ |
|---|
| 86 | n3, subject, predicate, obj, object_type, datatype, language) |
|---|
| 87 | |
|---|
| 88 | ## |
|---|
| 89 | # Called whenever the processing buffer for the C-side needs to be re-filled. |
|---|
| 90 | # |
|---|
| 91 | # @param dataFile the file-like object to use when reading in the data stream. |
|---|
| 92 | # @param bufferSize the size of the buffer to return. Returning anything less |
|---|
| 93 | # than bufferSize will halt execution after the returned |
|---|
| 94 | # buffer has been processed. |
|---|
| 95 | def handle_buffer(dataFile, bufferSize): |
|---|
| 96 | return dataFile.read(bufferSize) |
|---|
| 97 | |
|---|
| 98 | ## |
|---|
| 99 | # The main entry point for the script. |
|---|
| 100 | # |
|---|
| 101 | # @param argv the argument list passed to the program. |
|---|
| 102 | # @param stdout the standard output stream assigned to the program. |
|---|
| 103 | # @param environ the execution environment for the program. |
|---|
| 104 | def main(argv, stdout, environ): |
|---|
| 105 | print "creating parser" |
|---|
| 106 | parser = rdfa.RdfaParser("http://www.w3.org/2006/07/SWD/RDFa/testsuite/xhtml1-testcases/0001.xhtml") |
|---|
| 107 | |
|---|
| 108 | print "opening", argv[1] |
|---|
| 109 | data_file = open(argv[1], "r") |
|---|
| 110 | n3 = "" |
|---|
| 111 | |
|---|
| 112 | print "created parser" |
|---|
| 113 | parser.setDefaultGraphTripleHandler(default_triple, n3) |
|---|
| 114 | print "set default graph triple handler" |
|---|
| 115 | parser.setProcessorGraphTripleHandler(processor_triple, n3) |
|---|
| 116 | print "set processor graph triple handler" |
|---|
| 117 | parser.setBufferHandler(handle_buffer, data_file) |
|---|
| 118 | print "set buffer handler" |
|---|
| 119 | |
|---|
| 120 | print "parsing..." |
|---|
| 121 | parser.parse() |
|---|
| 122 | print "completed parsing" |
|---|
| 123 | data_file.close() |
|---|
| 124 | |
|---|
| 125 | ## |
|---|
| 126 | # Run the rdfa2n3 python application. |
|---|
| 127 | if __name__ == "__main__": |
|---|
| 128 | main(sys.argv, sys.stdout, os.environ) |
|---|