001    /***************************************************************************/
002    /*  Copyright (C) 2010-2011, Sebastian Hellmann                            */
003    /*  Note: If you need parts of NLP2RDF in another licence due to licence   */
004    /*  incompatibility, please mail hellmann@informatik.uni-leipzig.de        */
005    /*                                                                         */
006    /*  This file is part of NLP2RDF.                                          */
007    /*                                                                         */
008    /*  NLP2RDF is free software; you can redistribute it and/or modify        */
009    /*  it under the terms of the GNU General Public License as published by   */
010    /*  the Free Software Foundation; either version 3 of the License, or      */
011    /*  (at your option) any later version.                                    */
012    /*                                                                         */
013    /*  NLP2RDF is distributed in the hope that it will be useful,             */
014    /*  but WITHOUT ANY WARRANTY; without even the implied warranty of         */
015    /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
016    /*  GNU General Public License for more details.                           */
017    /*                                                                         */
018    /*  You should have received a copy of the GNU General Public License      */
019    /*  along with this program. If not, see <http://www.gnu.org/licenses/>.   */
020    /***************************************************************************/
021    
022    package org.nlp2rdf.core;
023    
024    import com.hp.hpl.jena.ontology.OntModel;
025    import com.hp.hpl.jena.ontology.OntModelSpec;
026    import com.hp.hpl.jena.rdf.model.ModelFactory;
027    import com.jamonapi.Monitor;
028    import com.jamonapi.MonitorFactory;
029    import eu.lod2.nlp2rdf.schema.sso.Phrase;
030    import eu.lod2.nlp2rdf.schema.sso.Sentence;
031    import eu.lod2.nlp2rdf.schema.sso.Word;
032    import eu.lod2.nlp2rdf.schema.str.Document;
033    import eu.lod2.nlp2rdf.schema.str.IString;
034    import eu.lod2.nlp2rdf.schema.tools.Factory;
035    import opennlp.tools.util.Span;
036    import org.nlp2rdf.core.util.URIGeneratorHelper;
037    import org.slf4j.Logger;
038    import org.slf4j.LoggerFactory;
039    
040    import java.util.*;
041    
042    /**
043     * User: Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
044     */
045    public class Text2RDF {
046        private static Logger log = LoggerFactory.getLogger(Text2RDF.class);
047    
048        public static final String stringOntologyUrl = "http://nlp2rdf.lod2.eu/schema/string/";
049        public static final String structuredSentenceOntologyUrl = "http://nlp2rdf.lod2.eu/schema/sso/";
050    
051        static {
052            Factory.registerCustomClasses();
053        }
054    
055    
056        public Document createDocumentAnnotation(String prefix, String text, URIGenerator uriGenerator, OntModel in) {
057            //make the uri and add the class for the URI recipe
058            String documentUri = uriGenerator.makeUri(prefix, text, new Span(0, text.length()), in);
059            //wrap it in a document
060            Document d = Document.create(documentUri, in);
061            d.setSourceString(text);
062            return d;
063        }
064    
065        public Word createWordAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
066            //make the uri and add the class for the URI recipe
067            String wordUri = uriGenerator.makeUri(prefix, text, span, in);
068            Word w = Word.create(wordUri, in);
069            w.setAnchorOf(span.getCoveredText(text).toString());
070            return w;
071        }
072    
073        public Phrase createPhraseAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
074            //make the uri and add the class for the URI recipe
075            String wordUri = uriGenerator.makeUri(prefix, text, span, in);
076            Phrase p = Phrase.create(wordUri, in);
077            p.setAnchorOf(span.getCoveredText(text).toString());
078            return p;
079        }
080    
081        public Sentence createSentenceAnnotation(String prefix, String text, Span span, URIGenerator uriGenerator, OntModel in) {
082            //make the uri and add the class for the URI recipe
083            String wordUri = uriGenerator.makeUri(prefix, text, span, in);
084            Sentence s = Sentence.create(wordUri, in);
085            s.setAnchorOf(span.getCoveredText(text).toString());
086            return s;
087        }
088    
089        public void addSSOproperties(String prefix, String text, URIGenerator uriGenerator, OntModel m) {
090            List<IString> sentences = new ArrayList<IString>(Sentence.list(m));
091            List<Span> spans = URIGeneratorHelper.getSpans(sentences, prefix, text, uriGenerator);
092            Collections.sort(spans, new Comparator<Span>() {
093                @Override
094                public int compare(Span span, Span span1) {
095                    return span.compareTo(span1);
096                }
097            });
098        }
099    
100    
101    
102       /* public OntModel processAsDocument(String prefix, String text, Tokenizer tokenizer, URIGenerator uriGenerator) {
103    
104            //generate the NIF model
105            OntModel ret = process(prefix, text, tokenizer, uriGenerator);
106            Document d = createDocumentAnnotation(prefix, text, uriGenerator, ret);
107            List<Sentence> sentenceList = Sentence.list(ret);
108            for (Sentence s : sentenceList) {
109                //assign a connection
110                d.addSubString(s);
111            }
112            return ret;
113    
114        }*/
115    
116        /**
117         * returns a basic model for the sentence
118         *
119         * @return
120         */
121        @Deprecated
122        public OntModel process(String prefix, String text, Tokenizer tokenizer, URIGenerator uriGenerator) {
123            assert tokenizer != null && text != null && uriGenerator != null && prefix != null;
124            OntModel ret = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, ModelFactory.createDefaultModel());
125            ret.setNsPrefix("sso", structuredSentenceOntologyUrl);
126            ret.setNsPrefix("string", stringOntologyUrl);
127            List<SentencePOJO> sentencePOJOs = POJOMaker.makePOJOs(text, tokenizer);
128            uriGenerator.init(text, SentencePOJO.getSpans(sentencePOJOs));
129            sentencePOJOs2OWL(prefix, text, sentencePOJOs, ret, uriGenerator);
130            //SaveOntology.saveAsTurtle(ret, "last.ttl");
131            //SaveOntology.saveAsRDFXML(ret, "last.owl");
132            return ret;
133        }
134    
135        /**
136         * Generates the NIF RDF and fills the supplied model.
137         * Also the URIs are set in the POJOs, so there is a modification of the parameter objects, i.e. the function is not transparent.
138         *
139         * @param prefix
140         * @param text
141         * @param sentencePOJOs
142         * @param model         The model which should be filled during
143         * @param uriGenerator
144         */
145        @Deprecated
146        public void sentencePOJOs2OWL(String prefix, String text, List<? extends SentencePOJO> sentencePOJOs, OntModel model, URIGenerator uriGenerator) {
147            for (SentencePOJO sentencePOJO : sentencePOJOs) {
148                Monitor mon = MonitorFactory.getTimeMonitor("sentencePOJO2OWL").start();
149    
150                //generate a sentence uri
151                String sentenceUri = uriGenerator.makeUri(prefix, text, sentencePOJO.getSpan());
152    
153                //add the class for the URI recipe
154                uriGenerator.assignRecipeClass(sentenceUri, model);
155    
156                // add sentence class, label and hasSource property
157                Sentence sentence = Sentence.create(sentenceUri, model);
158                sentence.setAnchorOf(sentencePOJO.getText());
159    
160                //set the uri and sentence in the POJOs
161                sentencePOJO.setUri(sentenceUri);
162                sentencePOJO.setSentence(sentence);
163    
164                List<WordPOJO> wordPOJOs = sentencePOJO.getWordPOJOs();
165    
166                for (int pos = 0; pos < wordPOJOs.size(); pos++) {
167                    String wordUri = uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos).getSpan());
168    
169                    //add the class for the URI recipe
170                    uriGenerator.assignRecipeClass(wordUri, model);
171    
172                    Word word = Word.create(wordUri, model);
173                    // connect sentence and token
174                    sentence.addWord(word);
175    
176                    //add uri and word to POJO
177                    wordPOJOs.get(pos).setUri(wordUri);
178                    wordPOJOs.get(pos).setWord(word);
179    
180                    boolean first = (pos == 0);
181                    boolean last = (pos == wordPOJOs.size() - 1);
182    
183                    // firstWord and lastWord
184                    if (first) {
185                        sentence.setFirstWord(word);
186                    } else if (last) {
187                        sentence.setLastWord(word);
188                    }
189                    // add info to token
190                    word.setAnchorOf(wordPOJOs.get(pos).getText());
191    
192                    if (wordPOJOs.size() == 1) {
193                        continue;
194                    }
195    
196                    // next/previous
197                    //TODO optimize URIgeneration can be faster here it is repeated quite often
198                    if (first) {
199                        Word nextWord = Word.create(uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos + 1).getSpan()), model);
200                        word.setNextWord(nextWord);
201                    } else if (last) {
202                        //Word previousWord = Word.create(uriGenerator.makeUri(prefix, S, text, wordPOJOs.get(pos - 1).getSpan()), model);
203                        //word.setNextWord(previousWord);
204                    } else {
205                        Word nextWord = Word.create(uriGenerator.makeUri(prefix, text, wordPOJOs.get(pos + 1).getSpan()), model);
206                        word.setNextWord(nextWord);
207                        //Word previousWord = Word.create(uriGenerator.makeUri(prefix, S, text, wordPOJOs.get(pos - 1).getSpan()), model);
208                        //word.setPreviousWord(previousWord);
209                    }
210    
211                }// end for
212    
213                mon.stop();
214                log.debug("Finished conversion of POJOs to RDF (" + sentencePOJO.getWordPOJOs().size() + " words, " + model.size() + " triples total, " + mon.getLastValue() + " ms.): " + sentencePOJO.getText());
215            }
216        }
217    
218    }