001    /***************************************************************************/
002    /*  Copyright (C) 2010-2011, Sebastian Hellmann                            */
003    /*  Note: If you need parts of NLP2RDF in another licence due to licence   */
004    /*  incompatibility, please mail hellmann@informatik.uni-leipzig.de        */
005    /*                                                                         */
006    /*  This file is part of NLP2RDF.                                          */
007    /*                                                                         */
008    /*  NLP2RDF is free software; you can redistribute it and/or modify        */
009    /*  it under the terms of the GNU General Public License as published by   */
010    /*  the Free Software Foundation; either version 3 of the License, or      */
011    /*  (at your option) any later version.                                    */
012    /*                                                                         */
013    /*  NLP2RDF is distributed in the hope that it will be useful,             */
014    /*  but WITHOUT ANY WARRANTY; without even the implied warranty of         */
015    /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the           */
016    /*  GNU General Public License for more details.                           */
017    /*                                                                         */
018    /*  You should have received a copy of the GNU General Public License      */
019    /*  along with this program. If not, see <http://www.gnu.org/licenses/>.   */
020    /***************************************************************************/
021    
022    package org.nlp2rdf.core;
023    
024    import com.jamonapi.Monitor;
025    import com.jamonapi.MonitorFactory;
026    import opennlp.tools.util.Span;
027    import org.slf4j.Logger;
028    import org.slf4j.LoggerFactory;
029    
030    import java.util.ArrayList;
031    import java.util.List;
032    
033    /**
034     * User: Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann
035     */
036    public class POJOMaker {
037    
038        private static Logger log = LoggerFactory.getLogger(POJOMaker.class);
039    
040        public static List<SentencePOJO> makePOJOs(String text, Tokenizer tokenizer) {
041    
042            List<SentencePOJO> ret = new ArrayList<SentencePOJO>();
043    
044            Span[] sentences = tokenizer.detectSentences(text);
045            for (Span one : sentences) {
046                ret.add(makeOneSentencePOJO(text, one, tokenizer));
047            }
048            return ret;
049        }
050    
051        protected static SentencePOJO makeOneSentencePOJO(String text, Span sentence, Tokenizer tokenizer) {
052    
053            Monitor mon = MonitorFactory.getTimeMonitor("makeOneSentencePOJO").start();
054            //generate the sentence POJO
055            SentencePOJO sentencePOJO = new SentencePOJO();
056            sentencePOJO.setText(sentence.getCoveredText(text).toString());
057            sentencePOJO.setSpan(sentence);
058    
059            List<WordPOJO> wordPOJOList = new ArrayList<WordPOJO>();
060            sentencePOJO.setWordPOJOs(wordPOJOList);
061    
062    
063            //the Tokenizer is not Thread-safe!
064            Span[] words = tokenizer.detectWords(sentencePOJO.getText());
065            for (Span one : words) {
066                WordPOJO wordPOJO = new WordPOJO();
067                wordPOJO.setText(one.getCoveredText(sentencePOJO.getText()).toString());
068                Span absolute = new Span(one, sentence.getStart());
069                wordPOJO.setSpan(absolute);
070    
071                if (log.isTraceEnabled()) {
072                    StringBuilder logging = new StringBuilder();
073                    logging.append("\nword: " + one.getCoveredText(sentencePOJO.getText()));
074                    logging.append("\nabsolute sentence position [start|end]: " + sentence.getStart() + "|" + sentence.getEnd());
075                    logging.append("\nrelative word position in sentence [start|end]: " + one.getStart() + "|" + one.getEnd());
076                    logging.append("\nabsolute word position [start|end]: " + absolute.getStart() + "|" + absolute.getEnd());
077                    log.trace(logging.toString());
078                }
079    
080                wordPOJOList.add(wordPOJO);
081            }
082            mon.stop();
083            log.debug("Finished creating POJOs of sentences (" + sentencePOJO.getWordPOJOs().size() + " words, " + mon.getLastValue() + " ms.): " + sentencePOJO.getText());
084            return sentencePOJO;
085        }
086    
087    }
088