package jena; import java.io.BufferedWriter; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.SimpleSelector; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.util.FileManager; import com.hp.hpl.jena.vocabulary.RDF; import de.fuberlin.wiwiss.d2rq.ModelD2RQ; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.*; import org.w3c.dom.*; import org.apache.commons.httpclient.*; import org.apache.commons.httpclient.methods.*; public class RDFGenerator { private Model model; // Data structure that stores the RDF graph private Model modelBuffer; // Since a model cannot be iterated and modified simultaneously, all new triples are stored in this buffer before being added to the main model. private String queryFile; // Text file containing the SPARQL construct query used to convert the RDB to an RDF graph private String queryName; // Name of construct query private String mappingFile; // Mapping file generated by the D2RQ-script generate mapping private String outputFile; // Output file private String outputFormat; // Serialization format (RDF/XML, N3 Turtle etc.) public static void main (String[] args) { /* Main is called only when this class is run separately (not called from the Menu class) */ if (args.length > 0 && args.length < 5 ) { /* Wrong number of command line arguments. Show usage instructions message and quit. */ System.err.println("Usage: java RDFGenerator [query file] [query name] [mapping file] [output file] [output format]"); return; } else if (args.length == 5) { /* Correct number of command line arguments. Initialize parameters and import data. */ System.out.println("Importing data using command line parameters. "); String queryFile = args[0]; String queryName = args[1]; String mappingFile = args[2]; String outputFile = args[3]; String outputFormat = args[4]; RDFGenerator importData = new RDFGenerator(queryFile, queryName, mappingFile, outputFile, outputFormat); importData.importDataFromRDB(); importData.addAlternateManifestationsOfEachExpression(); importData.identifyEquivalentResourcesInExternalGraph(); return; } else { /* Otherwise, import data using default parameters */ System.out.println("Importing data using default parameters. "); new RDFGenerator("import"); } } public RDFGenerator (String queryFileParameter, String queryNameParameter, String mappingFileParameter, String outputFileParameter, String outputFormatParameter) { /* Constructor called by main. */ model = ModelFactory.createDefaultModel(); modelBuffer = ModelFactory.createDefaultModel(); queryFile = queryFileParameter; queryName = queryNameParameter; mappingFile = mappingFileParameter; outputFile = outputFileParameter; outputFormat = outputFormatParameter; } public RDFGenerator (String method) { /* Constructor called by main and the menu class. */ model = ModelFactory.createDefaultModel(); modelBuffer = ModelFactory.createDefaultModel(); queryFile = "conf/queries.txt"; queryName = "main"; mappingFile = "conf/mapping.n3"; outputFile = "Graph.rdf"; outputFormat = "RDF/XML"; if (method=="import"){ System.out.println("Using default parameters."); importDataFromRDB(); } else if (method=="file"){ readDataFromFile(); } } private void importDataFromRDB () { /* This method is used to initialize the Jena-model by importing data from a relational database, using a D2RQ mapping file and a SPARQL Construct query. */ try { System.out.println("Starting import method."); // Start timer Timer timer = new Timer(); // Create the d2rq model of the relational database using the mapping file Model d2rqModel = new ModelD2RQ(mappingFile); System.out.println("The D2RQ model represents the relational database as a graph of " + d2rqModel.size() + " statements."); // Load SPARQL-query QueryReader queryReader = QueryReader.createQueryReader(queryFile); String queryString = queryReader.getQuery(queryName); // Create the query Query query = QueryFactory.create(queryString, Syntax.syntaxARQ); // Execute the query QueryExecution queryExecution = QueryExecutionFactory.create(query, d2rqModel); model = queryExecution.execConstruct(); System.out.println("Model size - measured in number of triples: " + model.size()); // Stop timer timer.stop(); fixNamespacePrefixes(); // Serialize resulting graph writeModelToFile(); // Update log file logQueryExecution(queryString, timer.getStartTime(), timer.getTotalTime()); } catch (Exception e) { System.out.println("Something went wrong with the import! \n" + "If you want to try again, please set up a new SSH-tunnell to the Biblab-server. \n"); return; } System.out.println("Import finished successfully."); // Calling method that adapts the graph to the FRBR ontology. adaptDataToFRBROntology(); } public void adaptDataToFRBROntology () { /* This method is used to adapt the data to the FRBR model by adding manifestations of expressions and moving the isbn property from the expression level to the manifestation level. */ System.out.println("Adapting data to the FRBR model. "); writeModelToFile(model, "GraphBeforeFRBRizing.n3", "N3"); // Start timer Timer timer = new Timer(); Resource expression = null; Resource manifestation = null; Resource review = null; Resource tagging = null; Resource description = null; String isbn = null; Resource manifestationClass = model.createResource("http://purl.org/vocab/frbr/core#Manifestation"); Property isbnProperty = model.createProperty("http://purl.org/ontology/bibo/", "isbn"); Property isbn10Property = model.createProperty("http://purl.org/ontology/bibo/", "isbn10"); Property isbn13Property = model.createProperty("http://purl.org/ontology/bibo/", "isbn13"); Property embodimentProperty = model.createProperty("http://purl.org/vocab/frbr/core#", "embodiment"); Property embodimentOfProperty = model.createProperty("http://purl.org/vocab/frbr/core#", "embodimentOf"); Property hasReviewProperty = model.createProperty("http://purl.org/stuff/rev#", "hasReview"); Property reviewsProperty = model.createProperty("http://openlibraryproject.no/ontology#", "reviews"); Property taggedResourceProperty = model.createProperty("http://www.holygoat.co.uk/owl/redwood/0.1/tags/", "taggedResource"); Property hasTaggingProperty = model.createProperty("http://openlibraryproject.no/ontology#", "hasTagging"); Property hasDescriptionProperty = model.createProperty("http://openlibraryproject.no/ontology#", "hasDescription"); Property describesProperty = model.createProperty("http://openlibraryproject.no/ontology#", "describes"); Statement isbnStatementToBeRemoved = null; Statement hasReviewStatementToBeRemoved = null; Statement reviewsStatementToBeRemoved = null; Statement taggedResourceStatementToBeRemoved = null; Statement hasTaggingStatementToBeRemoved = null; Statement hasDescriptionStatementToBeRemoved = null; Statement describesStatementToBeRemoved = null; Model modelOfStatementsToBeRemoved = ModelFactory.createDefaultModel(); // Find expression resources in the model and their property values String queryString = "PREFIX bibo: " + "PREFIX rev: " + "PREFIX tag: " + "PREFIX olp: " + "SELECT ?expression ?isbn ?review ?tagging ?description " + "WHERE {" + " ?expression bibo:isbn ?isbn . " + " OPTIONAL { ?expression rev:hasReview ?review . } " + " OPTIONAL { ?expression olp:hasTagging ?tagging . } " + " OPTIONAL { ?expression olp:hasDescription ?description . } " + " }" ; Query query = QueryFactory.create(queryString); // Execute the query and obtain results QueryExecution queryExecution = QueryExecutionFactory.create(query, model); try { ResultSet resultSet = queryExecution.execSelect() ; while (resultSet.hasNext()) { QuerySolution querySolution = resultSet.nextSolution(); isbn = querySolution.getLiteral("isbn").toString(); if (querySolution.getResource("expression")!=null) { expression = modelBuffer.createResource(querySolution.getResource("expression").getURI()); } if (querySolution.getResource("review")!=null) { review = modelBuffer.createResource(querySolution.getResource("review").getURI()); } if (querySolution.getResource("tagging")!=null) { tagging = modelBuffer.createResource(querySolution.getResource("tagging").getURI()); } if (querySolution.getResource("description")!=null) { description = modelBuffer.createResource(querySolution.getResource("description").getURI()); } // Create a new manifestation resource manifestation = modelBuffer.createResource("http://openlibraryproject.no/resource/manifestation/"+isbn); // Add RDF class reference manifestation.addProperty(RDF.type, manifestationClass); // Create expression - manifestation relations expression.addProperty(embodimentProperty, manifestation); manifestation.addProperty(embodimentOfProperty, expression); // Remove isbn property from expression isbnStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(expression, isbnProperty, isbn); modelOfStatementsToBeRemoved.add(isbnStatementToBeRemoved); // Add isbn property to manifestation manifestation.addProperty(isbnProperty, isbn); if (isbn.length()==10){ // Add isbn10 property to manifestation manifestation.addProperty(isbn10Property, isbn); } else if (isbn.length()==13){ // Add isbn13 property to manifestation manifestation.addProperty(isbn13Property, isbn); } if (review != null) { // Remove hasReview property from expression hasReviewStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(expression, hasReviewProperty, review); modelOfStatementsToBeRemoved.add(hasReviewStatementToBeRemoved); // Add hasAssesment property to manifestation manifestation.addProperty(hasReviewProperty, review); // Remove assessmentOf property from expression reviewsStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(review, reviewsProperty, expression); modelOfStatementsToBeRemoved.add(reviewsStatementToBeRemoved); // Add assessmentOf property to manifestation review.addProperty(reviewsProperty, manifestation ); } if (tagging != null) { // Remove tags property from expression taggedResourceStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(tagging, taggedResourceProperty, expression); modelOfStatementsToBeRemoved.add(taggedResourceStatementToBeRemoved); // Add tags property to manifestation tagging.addProperty(taggedResourceProperty, manifestation); // Remove hasTagging property from expression hasTaggingStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(expression, hasTaggingProperty, tagging); modelOfStatementsToBeRemoved.add(hasTaggingStatementToBeRemoved); // Add hasTagging property to manifestation manifestation.addProperty(hasTaggingProperty, tagging); } if (description != null) { // Remove hasDescription property from expression hasDescriptionStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(expression, hasDescriptionProperty, description); modelOfStatementsToBeRemoved.add(hasDescriptionStatementToBeRemoved); // Add hasDescription property to manifestation manifestation.addProperty(hasDescriptionProperty, description); // Remove describes property from expression describesStatementToBeRemoved = modelOfStatementsToBeRemoved.createStatement(description, describesProperty, expression); modelOfStatementsToBeRemoved.add(describesStatementToBeRemoved); // Add describes property to manifestation description.addProperty(describesProperty, manifestation); } } } finally { queryExecution.close(); } System.out.println("Model size before adding new triples: " + model.size()); // Add new triples to the model model.add(modelBuffer); System.out.println("Model size after adding new triples: " + model.size()); // Remove out dated triples from model model.remove(modelOfStatementsToBeRemoved); System.out.println("Model size after removing unwanted triples: " + model.size()); // Stop timer timer.stop(); // Serialize model writeModelToFile(); // Empty buffer models modelOfStatementsToBeRemoved.removeAll(); modelBuffer.removeAll(); } public void addAlternateManifestationsOfEachExpression () { /* This method is used to add alternate manifestations of each expression by calling an external web service. This enables libraries searching for assessments of a book to get relevant information back, even if their catalog post refer to a different edition than the one(s) users have assessed. */ System.out.println("Contacting WorldCat's xISBN webservice in order to find alternate manifestations of each expression. "); // Start timer Timer timer = new Timer(); // Find all manifestations and their corresponding ISBN Property isbnProperty = model.getProperty("http://purl.org/ontology/bibo/", "isbn"); StmtIterator i = model.listStatements(new SimpleSelector(null, isbnProperty, (RDFNode) null)); while (i.hasNext()){ Statement statement = i.nextStatement(); String isbn = statement.getObject().toString(); // Find related expression Resource expression = getRelatedExpression(isbn); if (expression != null) { xIsbnLookUp(expression, isbn); } } // Add new triples to model model = model.add(modelBuffer); modelBuffer.removeAll(); System.out.println("Model size after processing: " + model.size()); // Stop timer timer.stop(); // Serialize model writeModelToFile(); } private Resource getRelatedExpression (String isbn){ /* This method is used to return the expression resource related to a manifestation resource, based on its ISBN. */ Resource expression=null; //Find the related expression String queryString = "PREFIX frbr: " + "PREFIX bibo: " + "SELECT ?expression " + "WHERE {" + " ?expression a frbr:Expression . " + " ?expression frbr:embodiment ?manifestation . " + " ?manifestation bibo:isbn \"" + isbn + "\" . " + " }" ; Query query = QueryFactory.create(queryString); // Execute the query and obtain results QueryExecution queryExecution = QueryExecutionFactory.create(query, model); ResultSet results = queryExecution.execSelect(); if (results.hasNext()){ QuerySolution querySolution = results.nextSolution(); expression = querySolution.getResource("expression"); } else{ return expression = null; } return expression; } public void xIsbnLookUp (Resource expression, String isbn) { /* This method is used to contact WorldCat's xISBN web service in order to identify alternative manifestations of each expression and add these to the local graph. The web service call uses the ISBN of a manifestation resource already in the local graph. The web service returns an XML containing ISBNs of alternative manifestations. These manifestations are then added to the local graph as new resources with corresponding ISBN literals. The web service call is based on the tutorial: http://developer.yahoo.com/java/howto-reqRestJava.html */ String request = "http://xisbn.worldcat.org/webservices/xid/isbn/" + isbn + "?method=getEditions&format=xml&fl=form,year,lang,ed"; HttpClient client = new HttpClient(); GetMethod method = new GetMethod(request); // Send GET request int statusCode; try { statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + method.getStatusLine()); } else { InputStream resultStream = null; // Get the response body resultStream = method.getResponseBodyAsStream(); // Source: http://developer.yahoo.com/java/howto-parseRestJava.html // Process response Document response = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(resultStream); XPathFactory factory = XPathFactory.newInstance(); XPath xPath=factory.newXPath(); // Get all ISBN nodes NodeList nodes = (NodeList)xPath.evaluate("/rsp/isbn", response, XPathConstants.NODESET); int nodeCount = nodes.getLength(); if (nodeCount > 0){ System.out.println("Found " + nodeCount + " manifestations of expression " + expression.toString() + " by calling the xISBN web service. "); // Iterate over ISBN nodes for (int i = 0; i < nodeCount; i++) { //Get each xpath expression as a string String alternateISBN = (String)xPath.evaluate(".", nodes.item(i), XPathConstants.STRING); addNewManifestationOfExpression(expression, alternateISBN); } } } } catch (Exception e) { e.printStackTrace(); } } private void addNewManifestationOfExpression (Resource expression, String isbn){ /* This method is used to add a new manifestation resource and link it to the corresponding expression resource. */ Resource manifestation = modelBuffer.createResource("http://openlibraryproject.no/resource/manifestation/" + isbn); String frbrPropertyUri = "http://purl.org/vocab/frbr/core#"; Property embodimentOf = modelBuffer.createProperty(frbrPropertyUri,"embodimentOf"); Property embodiment = modelBuffer.createProperty(frbrPropertyUri,"embodiment"); manifestation.addProperty(embodimentOf, expression); expression.addProperty(embodiment, manifestation); Property isbnProperty = modelBuffer.createProperty("http://purl.org/ontology/bibo/", "isbn"); manifestation.addProperty(isbnProperty, isbn); if (isbn.length()==10){ // Add ISBN10 Property isbn10Property = modelBuffer.createProperty("http://purl.org/ontology/bibo/", "isbn10"); manifestation.addProperty(isbn10Property, isbn); } else if (isbn.length()==13){ // Add ISBN10 Property isbn13Property = modelBuffer.createProperty("http://purl.org/ontology/bibo/", "isbn13"); manifestation.addProperty(isbn13Property, isbn); } // Add RDF class reference Resource typeResource = modelBuffer.createResource("http://purl.org/vocab/frbr/core#Manifestation"); manifestation.addProperty(RDF.type, typeResource); } public void identifyEquivalentResourcesInExternalGraph() { /* This method is used to find the ISBN of all manifestation resources in the local graph and call a method that identifies and adds links to equivalent external resources. */ System.out.println("Looking for equivalent resources in openlibrary.org's graph by contacting Talis SPARQL endpoint."); // Start timer Timer timer = new Timer(); Resource manifestation = null; String isbn = null; // Find manifestation resources in the model and their related ISBNs String queryString = "PREFIX bibo: " + "SELECT ?manifestation ?isbn " + "WHERE {" + " ?manifestation bibo:isbn ?isbn . " + " }" ; Query query = QueryFactory.create(queryString); // Execute the query and obtain results QueryExecution queryExecution = QueryExecutionFactory.create(query, model); try { ResultSet results = queryExecution.execSelect() ; while (results.hasNext()) { QuerySolution querySolution = results.nextSolution() ; manifestation = querySolution.getResource("manifestation") ; isbn = querySolution.getLiteral("isbn").toString(); // Connect to Talis addLinksToEquivalentResources (manifestation, isbn); } } finally { queryExecution.close(); } //Add new triples to the model model.add(modelBuffer); modelBuffer.removeAll(); System.out.println("Model size after processing: " + model.size()); // Stop timer timer.stop(); // Serialize model writeModelToFile(); } public void addLinksToEquivalentResources (Resource manifestationResource, String isbn) { /* This method is used to identify equivalent resources in Openlibrary.org's RDF-graph based on ISBN. It then calls a method that adds sameAs statements to these resources in the local graph. */ String endpoint = "http://api.talis.com/stores/openlibrary/services/sparql"; String queryString = " PREFIX bibo: " + " SELECT ?book " + " WHERE " + " { " + " { ?book bibo:isbn13 '" + isbn + "' . } " + " UNION " + " { ?book bibo:isbn10 '" + isbn + "' . } " + " } "; Query query = QueryFactory.create(queryString); QueryExecution queryExecution = QueryExecutionFactory.sparqlService(endpoint, query); try { ResultSet resultSet = queryExecution.execSelect() ; if (resultSet.hasNext()){ System.out.println("Found equivalent resource(s) to " + manifestationResource.getURI() + " in Openlibrary.org's graph"); while (resultSet.hasNext()) { QuerySolution querySolution = resultSet.nextSolution(); Resource openLibraryResource = querySolution.getResource("book"); addSameAsStatements (manifestationResource, openLibraryResource); } } } finally{ queryExecution.close(); } } private void addSameAsStatements (Resource existingResource, Resource newResource){ /* This method is used to add sameAs statements between resources. */ String propertyUri = "http://www.w3.org/2002/07/owl#"; Property sameAs = modelBuffer.createProperty(propertyUri,"sameAs"); Statement sameAsStatement = modelBuffer.createStatement(newResource, sameAs, existingResource); Statement sameAsStatementReversed = modelBuffer.createStatement(existingResource, sameAs, newResource); modelBuffer.add(sameAsStatement); modelBuffer.add(sameAsStatementReversed); } public void readDataFromFile (){ /* This method is used to initialize the Jena-model from file instead of importing data from a relational database.*/ System.out.println("Reading data from " + outputFile + "."); // use the FileManager to find the input file InputStream in = FileManager.get().open(outputFile); if (in == null) { throw new IllegalArgumentException("File: " + outputFile + " not found"); } // read the RDF/XML file model.read(in, null); System.out.println("Model size: " + model.size()); } void writeModelToFile () { /* This method is used to serialize the Jena-model to file. */ System.out.println("Writing model to file: " + outputFile + "."); FileOutputStream outputStream; try { outputStream = new FileOutputStream(outputFile); model.write(outputStream, outputFormat); } catch (FileNotFoundException e) { e.printStackTrace(); } } void writeModelToFile (Model model, String outputFile, String outputFormat) { /* This method is used to serialize a model to file. */ System.out.println("Writing model to file: " + outputFile + "."); FileOutputStream outputStream; try { outputStream = new FileOutputStream(outputFile); model.write(outputStream, outputFormat); } catch (FileNotFoundException e) { e.printStackTrace(); } } private void fixNamespacePrefixes() { // Set namespace prefixes for pretty XML print model.setNsPrefix("olp", "http://openlibraryproject.no/ontology#"); model.setNsPrefix("frbr", "http://purl.org/vocab/frbr/core#"); model.setNsPrefix("foaf", "http://xmlns.com/foaf/0.1/"); model.setNsPrefix("tag", "http://www.holygoat.co.uk/owl/redwood/0.1/tags/"); model.setNsPrefix("rev", "http://purl.org/stuff/rev#"); modelBuffer.setNsPrefix("olp", "http://openlibraryproject.no/ontology#"); modelBuffer.setNsPrefix("frbr", "http://purl.org/vocab/frbr/core#"); modelBuffer.setNsPrefix("foaf", "http://xmlns.com/foaf/0.1/"); modelBuffer.setNsPrefix("tag", "http://www.holygoat.co.uk/owl/redwood/0.1/tags/"); modelBuffer.setNsPrefix("rev", "http://purl.org/stuff/rev#"); // Remove unused namespace prefixes added by D2RQ model.removeNsPrefix("jdbc"); model.removeNsPrefix("map"); model.removeNsPrefix("vocab"); model.removeNsPrefix("jdbc"); model.removeNsPrefix("db"); model.removeNsPrefix("owl"); model.removeNsPrefix("rdfs"); model.removeNsPrefix("xsd"); } private void logQueryExecution (String queryString, String startTime, String totalTime) { /* This method is used to log the results of each import to a log file. */ try { BufferedWriter log = new BufferedWriter(new FileWriter("Log.txt", true)); log.newLine(); log.write("Timestamp: " + startTime + "\n"); log.write("Parameters: " + queryFile + " " + queryName + " " + mappingFile + " " + outputFile + " " + outputFormat + " \n"); log.write("Query: " + "\n"); log.write(queryString + "\n"); log.write("Model size measured in number of triples: " + model.size() + "\n"); log.write("Execution time: " + totalTime + "\n \n"); log.write("*****************************************" + "\n \n"); log.close(); } catch (IOException e) { System.out.println("Exception "); } } }