| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | import sys, os, urllib2 |
|---|
| 22 | sys.path += ("../python/dist",) |
|---|
| 23 | import rdfa |
|---|
| 24 | from StringIO import StringIO |
|---|
| 25 | from rdflib.Graph import ConjunctiveGraph |
|---|
| 26 | |
|---|
| 27 | URL_TYPE_HTTP = 1 |
|---|
| 28 | URL_TYPE_FILE = 2 |
|---|
| 29 | |
|---|
| 30 | |
|---|
| 31 | |
|---|
| 32 | |
|---|
| 33 | |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | |
|---|
| 37 | |
|---|
| 38 | |
|---|
| 39 | |
|---|
| 40 | def handleTriple(rdf, subject, predicate, obj, objectType, dataType, |
|---|
| 41 | language): |
|---|
| 42 | |
|---|
| 43 | if(objectType == rdfa.RDF_TYPE_NAMESPACE_PREFIX): |
|---|
| 44 | rdf['namespaces'][predicate] = obj |
|---|
| 45 | else: |
|---|
| 46 | rdf['triples'].append( |
|---|
| 47 | (subject, predicate, obj, objectType, dataType, language)) |
|---|
| 48 | |
|---|
| 49 | |
|---|
| 50 | |
|---|
| 51 | |
|---|
| 52 | |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | def fillBuffer(dataFile, bufferSize): |
|---|
| 57 | return dataFile.read() |
|---|
| 58 | |
|---|
| 59 | def objectToN3(obj, objectType, dataType, language): |
|---|
| 60 | rval = "" |
|---|
| 61 | |
|---|
| 62 | if(objectType in (rdfa.RDF_TYPE_PLAIN_LITERAL, |
|---|
| 63 | rdfa.RDF_TYPE_TYPED_LITERAL, |
|---|
| 64 | rdfa.RDF_TYPE_XML_LITERAL)): |
|---|
| 65 | rval += "\"%s\"" % \ |
|---|
| 66 | (obj.replace("\"", "\\\"").replace("\n", "\\n"),) |
|---|
| 67 | elif(objectType == rdfa.RDF_TYPE_IRI): |
|---|
| 68 | rval += "<%s>" % (obj,) |
|---|
| 69 | |
|---|
| 70 | if(language and (objectType == rdfa.RDF_TYPE_PLAIN_LITERAL)): |
|---|
| 71 | rval += "@%s" % (language,) |
|---|
| 72 | |
|---|
| 73 | if(objectType == rdfa.RDF_TYPE_TYPED_LITERAL): |
|---|
| 74 | rval += "^^<%s>" % (dataType,) |
|---|
| 75 | elif(objectType == rdfa.RDF_TYPE_XML_LITERAL): |
|---|
| 76 | rval += "^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>" |
|---|
| 77 | |
|---|
| 78 | return rval |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | |
|---|
| 82 | |
|---|
| 83 | |
|---|
| 84 | |
|---|
| 85 | |
|---|
| 86 | |
|---|
| 87 | |
|---|
| 88 | def bnodeToN3(triples, processed, allTriples): |
|---|
| 89 | |
|---|
| 90 | rval = "[ " |
|---|
| 91 | |
|---|
| 92 | |
|---|
| 93 | previousTriple = False |
|---|
| 94 | for triple in triples: |
|---|
| 95 | subject = triple[0] |
|---|
| 96 | predicate = triple[1] |
|---|
| 97 | obj = triple[2] |
|---|
| 98 | objectType = triple[3] |
|---|
| 99 | dataType = triple[4] |
|---|
| 100 | language = triple[5] |
|---|
| 101 | |
|---|
| 102 | |
|---|
| 103 | if(previousTriple): |
|---|
| 104 | rval += "; " |
|---|
| 105 | else: |
|---|
| 106 | previousTriple = True |
|---|
| 107 | |
|---|
| 108 | if(obj.startswith("_:") and obj not in processed): |
|---|
| 109 | objectTriples = getTriplesBySubject(obj, allTriples) |
|---|
| 110 | rval += "<%s> %s" % \ |
|---|
| 111 | (predicate, bnodeToN3(objectTriples, processed, allTriples)) |
|---|
| 112 | elif(subject not in processed): |
|---|
| 113 | rval += "<%s> %s" % \ |
|---|
| 114 | (predicate, objectToN3(obj, objectType, dataType, language)) |
|---|
| 115 | |
|---|
| 116 | rval += " ]" |
|---|
| 117 | |
|---|
| 118 | if(len(triples) < 1): |
|---|
| 119 | rval = "[ ]" |
|---|
| 120 | else: |
|---|
| 121 | processed.append(triples[0][0]) |
|---|
| 122 | |
|---|
| 123 | return rval |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | |
|---|
| 127 | |
|---|
| 128 | |
|---|
| 129 | |
|---|
| 130 | |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | |
|---|
| 134 | |
|---|
| 135 | |
|---|
| 136 | |
|---|
| 137 | def tripleToN3(triples, processed, allTriples): |
|---|
| 138 | rval = "" |
|---|
| 139 | |
|---|
| 140 | for triple in triples: |
|---|
| 141 | subject = triple[0] |
|---|
| 142 | predicate = triple[1] |
|---|
| 143 | obj = triple[2] |
|---|
| 144 | objectType = triple[3] |
|---|
| 145 | dataType = triple[4] |
|---|
| 146 | language = triple[5] |
|---|
| 147 | |
|---|
| 148 | if(not (obj.startswith("_:") and (obj in processed))): |
|---|
| 149 | rval += "<%s> <%s> " % (subject, predicate) |
|---|
| 150 | |
|---|
| 151 | |
|---|
| 152 | |
|---|
| 153 | if(obj.startswith("_:")): |
|---|
| 154 | bnodeTriples = getTriplesBySubject(obj, allTriples) |
|---|
| 155 | rval += bnodeToN3(bnodeTriples, processed, allTriples) |
|---|
| 156 | else: |
|---|
| 157 | rval += objectToN3(obj, objectType, dataType, language) |
|---|
| 158 | |
|---|
| 159 | rval += " .\n" |
|---|
| 160 | |
|---|
| 161 | return rval |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | |
|---|
| 165 | |
|---|
| 166 | |
|---|
| 167 | |
|---|
| 168 | |
|---|
| 169 | def getNonBnodeSubjects(triples): |
|---|
| 170 | rval = {} |
|---|
| 171 | |
|---|
| 172 | for triple in triples: |
|---|
| 173 | subject = triple[0] |
|---|
| 174 | if(not subject.startswith("_:")): |
|---|
| 175 | rval[subject] = True |
|---|
| 176 | |
|---|
| 177 | return rval.keys() |
|---|
| 178 | |
|---|
| 179 | |
|---|
| 180 | |
|---|
| 181 | |
|---|
| 182 | |
|---|
| 183 | |
|---|
| 184 | |
|---|
| 185 | def getBnodeSubjects(triples): |
|---|
| 186 | rval = {} |
|---|
| 187 | |
|---|
| 188 | for triple in triples: |
|---|
| 189 | subject = triple[0] |
|---|
| 190 | if(subject.startswith("_:")): |
|---|
| 191 | rval[subject] = True |
|---|
| 192 | |
|---|
| 193 | rval = rval.keys() |
|---|
| 194 | rval.sort() |
|---|
| 195 | |
|---|
| 196 | return rval |
|---|
| 197 | |
|---|
| 198 | |
|---|
| 199 | |
|---|
| 200 | |
|---|
| 201 | |
|---|
| 202 | |
|---|
| 203 | |
|---|
| 204 | def getTriplesBySubject(subject, triples): |
|---|
| 205 | rval = [] |
|---|
| 206 | |
|---|
| 207 | for triple in triples: |
|---|
| 208 | if(triple[0] == subject): |
|---|
| 209 | rval.append(triple) |
|---|
| 210 | |
|---|
| 211 | return rval |
|---|
| 212 | |
|---|
| 213 | |
|---|
| 214 | |
|---|
| 215 | |
|---|
| 216 | |
|---|
| 217 | |
|---|
| 218 | |
|---|
| 219 | def getRdfXml(rdf): |
|---|
| 220 | n3 = "" |
|---|
| 221 | |
|---|
| 222 | |
|---|
| 223 | rdf['namespaces']['xh1'] = "http://www.w3.org/1999/xhtml/vocab#" |
|---|
| 224 | rdf['namespaces']['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
|---|
| 225 | for prefix, uri in rdf['namespaces'].items(): |
|---|
| 226 | n3 += "@prefix %s: <%s> .\n" % (prefix, uri) |
|---|
| 227 | |
|---|
| 228 | |
|---|
| 229 | triples = rdf['triples'] |
|---|
| 230 | processed = [] |
|---|
| 231 | |
|---|
| 232 | |
|---|
| 233 | nonBnodeSubjects = getNonBnodeSubjects(triples) |
|---|
| 234 | |
|---|
| 235 | |
|---|
| 236 | bnodeSubjects = getBnodeSubjects(triples) |
|---|
| 237 | |
|---|
| 238 | for subject in nonBnodeSubjects: |
|---|
| 239 | subjectTriples = getTriplesBySubject(subject, triples) |
|---|
| 240 | |
|---|
| 241 | |
|---|
| 242 | if(subject not in processed): |
|---|
| 243 | n3 += tripleToN3(subjectTriples, processed, triples) |
|---|
| 244 | processed.append(subject) |
|---|
| 245 | |
|---|
| 246 | for subject in bnodeSubjects: |
|---|
| 247 | subjectTriples = getTriplesBySubject(subject, triples) |
|---|
| 248 | |
|---|
| 249 | if(subject not in processed): |
|---|
| 250 | n3 += bnodeToN3(subjectTriples, processed, triples) |
|---|
| 251 | n3 += " .\n" |
|---|
| 252 | |
|---|
| 253 | |
|---|
| 254 | |
|---|
| 255 | g = ConjunctiveGraph() |
|---|
| 256 | g.parse(StringIO(n3), format="n3") |
|---|
| 257 | rdfxml = g.serialize() |
|---|
| 258 | |
|---|
| 259 | return rdfxml |
|---|
| 260 | |
|---|
| 261 | |
|---|
| 262 | |
|---|
| 263 | |
|---|
| 264 | |
|---|
| 265 | |
|---|
| 266 | |
|---|
| 267 | def main(argv, stdout, environ): |
|---|
| 268 | urlType = URL_TYPE_FILE |
|---|
| 269 | |
|---|
| 270 | if((len(argv) > 1) and (len(argv[1]) > 4)): |
|---|
| 271 | if(argv[1][:5] == "http:"): |
|---|
| 272 | urlType = URL_TYPE_HTTP |
|---|
| 273 | else: |
|---|
| 274 | print "usage:", argv[0], "<file>" |
|---|
| 275 | print "or" |
|---|
| 276 | print " ", argv[0], "<URL>" |
|---|
| 277 | sys.exit(1) |
|---|
| 278 | |
|---|
| 279 | if((urlType == URL_TYPE_FILE) and (not os.path.exists(argv[1]))): |
|---|
| 280 | print "File %s, does not exist" % (argv[1]) |
|---|
| 281 | sys.exit(1) |
|---|
| 282 | if((urlType == URL_TYPE_FILE) and (not os.access(argv[1], os.R_OK))): |
|---|
| 283 | print "Cannot read file named %s" % (argv[1]) |
|---|
| 284 | sys.exit(1) |
|---|
| 285 | |
|---|
| 286 | |
|---|
| 287 | dataFile = None |
|---|
| 288 | parser = None |
|---|
| 289 | |
|---|
| 290 | |
|---|
| 291 | if(urlType == URL_TYPE_HTTP): |
|---|
| 292 | dataFile = urllib2.urlopen(argv[1]) |
|---|
| 293 | parser = rdfa.RdfaParser(argv[1]) |
|---|
| 294 | else: |
|---|
| 295 | dataFile = open(argv[1], "r") |
|---|
| 296 | parser = rdfa.RdfaParser("file://" + os.path.abspath(argv[1])) |
|---|
| 297 | |
|---|
| 298 | |
|---|
| 299 | |
|---|
| 300 | rdf = {} |
|---|
| 301 | rdf['namespaces'] = {} |
|---|
| 302 | rdf['triples'] = [] |
|---|
| 303 | |
|---|
| 304 | |
|---|
| 305 | parser.setTripleHandler(handleTriple, rdf) |
|---|
| 306 | parser.setBufferHandler(fillBuffer, dataFile) |
|---|
| 307 | |
|---|
| 308 | |
|---|
| 309 | parser.parse() |
|---|
| 310 | |
|---|
| 311 | |
|---|
| 312 | dataFile.close() |
|---|
| 313 | |
|---|
| 314 | |
|---|
| 315 | print getRdfXml(rdf) |
|---|
| 316 | |
|---|
| 317 | |
|---|
| 318 | |
|---|
| 319 | if __name__ == "__main__": |
|---|
| 320 | main(sys.argv, sys.stdout, os.environ) |
|---|