001 /***************************************************************************/
002 /* Copyright (C) 2010-2011, Sebastian Hellmann */
003 /* Note: If you need parts of NLP2RDF in another licence due to licence */
004 /* incompatibility, please mail hellmann@informatik.uni-leipzig.de */
005 /* */
006 /* This file is part of NLP2RDF. */
007 /* */
008 /* NLP2RDF is free software; you can redistribute it and/or modify */
009 /* it under the terms of the GNU General Public License as published by */
010 /* the Free Software Foundation; either version 3 of the License, or */
011 /* (at your option) any later version. */
012 /* */
013 /* NLP2RDF is distributed in the hope that it will be useful, */
014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
016 /* GNU General Public License for more details. */
017 /* */
018 /* You should have received a copy of the GNU General Public License */
019 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
020 /***************************************************************************/
021
022 package org.nlp2rdf.core;
023
024 import com.hp.hpl.jena.ontology.OntModel;
025 import com.hp.hpl.jena.ontology.OntModelSpec;
026 import com.hp.hpl.jena.rdf.model.ModelFactory;
027 import com.jamonapi.Monitor;
028 import com.jamonapi.MonitorFactory;
029 import eu.lod2.nlp2rdf.schema.sso.Phrase;
030 import eu.lod2.nlp2rdf.schema.sso.Sentence;
031 import eu.lod2.nlp2rdf.schema.sso.Word;
032 import eu.lod2.nlp2rdf.schema.str.Document;
033 import eu.lod2.nlp2rdf.schema.str.IString;
034 import eu.lod2.nlp2rdf.schema.tools.Factory;
035 import opennlp.tools.util.Span;
036 import org.nlp2rdf.core.util.URIGeneratorHelper;
037 import org.slf4j.Logger;
038 import org.slf4j.LoggerFactory;
039
040 import java.util.*;
041
042 /**
043 * User: Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
044 */
045 public class Text2RDF {
046 private static Logger log = LoggerFactory.getLogger(Text2RDF.class);
047
048 public static final String stringOntologyUrl = "http://nlp2rdf.lod2.eu/schema/string/";
049 public static final String structuredSentenceOntologyUrl = "http://nlp2rdf.lod2.eu/schema/sso/";
050
051 static {
052 Factory.registerCustomClasses();
053 }
054
055
056 public Document createDocumentAnnotation(String prefix, String text, URIGenerator uriGenerator, OntModel in) {
057 //make the uri and add the class for the URI recipe
058 String documentUri = uriGenerator.makeUri(prefix, text, new Span(0, text.length()), in);
059 //wrap it in a document
060 Document d = Document.create(documentUri, in);
061 d.setSourceString(text);
062 return d;
063 }
064
065 public Word createWordAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
066 //make the uri and add the class for the URI recipe
067 String wordUri = uriGenerator.makeUri(prefix, text, span, in);
068 Word w = Word.create(wordUri, in);
069 w.setAnchorOf(span.getCoveredText(text).toString());
070 return w;
071 }
072
073 public Phrase createPhraseAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
074 //make the uri and add the class for the URI recipe
075 String wordUri = uriGenerator.makeUri(prefix, text, span, in);
076 Phrase p = Phrase.create(wordUri, in);
077 p.setAnchorOf(span.getCoveredText(text).toString());
078 return p;
079 }
080
081 public Sentence createSentenceAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
082 //make the uri and add the class for the URI recipe
083 String wordUri = uriGenerator.makeUri(prefix, text, span, in);
084 Sentence s = Sentence.create(wordUri, in);
085 s.setAnchorOf(span.getCoveredText(text).toString());
086 return s;
087 }
088
089 public void addSSOproperties(String prefix, String text, URIGenerator uriGenerator, OntModel m) {
090 List<IString> sentences = new ArrayList<IString>(Sentence.list(m));
091 List<Span> spans = URIGeneratorHelper.getSpans(sentences, prefix, text, uriGenerator);
092 Collections.sort(spans, new Comparator<Span>() {
093 @Override
094 public int compare(Span span, Span span1) {
095 return span.compareTo(span1);
096 }
097 });
098 }
099
100
101
102 /* public OntModel processAsDocument(String prefix, String text, Tokenizer tokenizer, URIGenerator uriGenerator) {
103
104 //generate the NIF model
105 OntModel ret = process(prefix, text, tokenizer, uriGenerator);
106 Document d = createDocumentAnnotation(prefix, text, uriGenerator, ret);
107 List<Sentence> sentenceList = Sentence.list(ret);
108 for (Sentence s : sentenceList) {
109 //assign a connection
110 d.addSubString(s);
111 }
112 return ret;
113
114 }*/
115
116 /**
117 * returns a basic model for the sentence
118 *
119 * @return
120 */
121 @Deprecated
122 public OntModel process(String prefix, String text, Tokenizer tokenizer, URIGenerator uriGenerator) {
123 assert tokenizer != null && text != null && uriGenerator != null && prefix != null;
124 OntModel ret = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, ModelFactory.createDefaultModel());
125 ret.setNsPrefix("sso", structuredSentenceOntologyUrl);
126 ret.setNsPrefix("string", stringOntologyUrl);
127 List<SentencePOJO> sentencePOJOs = POJOMaker.makePOJOs(text, tokenizer);
128 uriGenerator.init(text, SentencePOJO.getSpans(sentencePOJOs));
129 sentencePOJOs2OWL(prefix, text, sentencePOJOs, ret, uriGenerator);
130 //SaveOntology.saveAsTurtle(ret, "last.ttl");
131 //SaveOntology.saveAsRDFXML(ret, "last.owl");
132 return ret;
133 }
134
135 /**
136 * Generates the NIF RDF and fills the supplied model.
137 * Also the URIs are set in the POJOs, so there is a modification of the parameter objects, i.e. the function is not transparent.
138 *
139 * @param prefix
140 * @param text
141 * @param sentencePOJOs
142 * @param model The model which should be filled during
143 * @param uriGenerator
144 */
145 @Deprecated
146 public void sentencePOJOs2OWL(String prefix, String text, List<? extends SentencePOJO> sentencePOJOs, OntModel model, URIGenerator uriGenerator) {
147 for (SentencePOJO sentencePOJO : sentencePOJOs) {
148 Monitor mon = MonitorFactory.getTimeMonitor("sentencePOJO2OWL").start();
149
150 //generate a sentence uri
151 String sentenceUri = uriGenerator.makeUri(prefix, text, sentencePOJO.getSpan());
152
153 //add the class for the URI recipe
154 uriGenerator.assignRecipeClass(sentenceUri, model);
155
156 // add sentence class, label and hasSource property
157 Sentence sentence = Sentence.create(sentenceUri, model);
158 sentence.setAnchorOf(sentencePOJO.getText());
159
160 //set the uri and sentence in the POJOs
161 sentencePOJO.setUri(sentenceUri);
162 sentencePOJO.setSentence(sentence);
163
164 List<WordPOJO> wordPOJOs = sentencePOJO.getWordPOJOs();
165
166 for (int pos = 0; pos < wordPOJOs.size(); pos++) {
167 String wordUri = uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos).getSpan());
168
169 //add the class for the URI recipe
170 uriGenerator.assignRecipeClass(wordUri, model);
171
172 Word word = Word.create(wordUri, model);
173 // connect sentence and token
174 sentence.addWord(word);
175
176 //add uri and word to POJO
177 wordPOJOs.get(pos).setUri(wordUri);
178 wordPOJOs.get(pos).setWord(word);
179
180 boolean first = (pos == 0);
181 boolean last = (pos == wordPOJOs.size() - 1);
182
183 // firstWord and lastWord
184 if (first) {
185 sentence.setFirstWord(word);
186 } else if (last) {
187 sentence.setLastWord(word);
188 }
189 // add info to token
190 word.setAnchorOf(wordPOJOs.get(pos).getText());
191
192 if (wordPOJOs.size() == 1) {
193 continue;
194 }
195
196 // next/previous
197 //TODO optimize URIgeneration can be faster here it is repeated quite often
198 if (first) {
199 Word nextWord = Word.create(uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos + 1).getSpan()), model);
200 word.setNextWord(nextWord);
201 } else if (last) {
202 //Word previousWord = Word.create(uriGenerator.makeUri(prefix, S, text, wordPOJOs.get(pos - 1).getSpan()), model);
203 //word.setNextWord(previousWord);
204 } else {
205 Word nextWord = Word.create(uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos + 1).getSpan()), model);
206 word.setNextWord(nextWord);
207 //Word previousWord = Word.create(uriGenerator.makeUri(prefix, S, text, wordPOJOs.get(pos - 1).getSpan()), model);
208 //word.setPreviousWord(previousWord);
209 }
210
211 }// end for
212
213 mon.stop();
214 log.debug("Finished conversion of POJOs to RDF (" + sentencePOJO.getWordPOJOs().size() + " words, " + model.size() + " triples total, " + mon.getLastValue() + " ms.): " + sentencePOJO.getText());
215 }
216 }
217
218 }